Tools and RAG are working together!!
This commit is contained in:
parent
cf29c85449
commit
5f1f641dba
@ -318,11 +318,9 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
|
||||
return <></>
|
||||
}
|
||||
|
||||
console.log(JSON.stringify(metadata.tools[0].result, null, 2));
|
||||
|
||||
return (<>
|
||||
{
|
||||
metadata.tools !== undefined &&
|
||||
metadata.tools !== undefined && metadata.tools.length !== 0 &&
|
||||
<Typography sx={{ marginBottom: 2 }}>
|
||||
<p>Tools queried:</p>
|
||||
{metadata.tools.map((tool: any, index: number) => <>
|
||||
@ -340,7 +338,7 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
|
||||
{
|
||||
metadata.rag.name !== undefined &&
|
||||
<Typography sx={{ marginBottom: 2 }}>
|
||||
<p>RAG from '{metadata.rag.name}' collection matches against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
|
||||
<p>Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
|
||||
{metadata.rag.ids.map((id: number, index: number) => <>
|
||||
<Divider />
|
||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }} key={index}>
|
||||
|
@ -52,7 +52,10 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
||||
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from utils import rag as Rag
|
||||
from utils import (
|
||||
rag as Rag,
|
||||
defines
|
||||
)
|
||||
|
||||
from tools import (
|
||||
DateTime,
|
||||
@ -129,15 +132,14 @@ def system_info(model):
|
||||
"System RAM": get_installed_ram(),
|
||||
"Graphics Card": get_graphics_cards(),
|
||||
"CPU": get_cpu_info(),
|
||||
"LLM Model": model
|
||||
"LLM Model": model,
|
||||
"Context length": defines.max_context
|
||||
}
|
||||
|
||||
# %%
|
||||
# Defaults
|
||||
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
|
||||
#MODEL_NAME = "deepseek-r1:7b"
|
||||
#MODEL_NAME = "llama3.2"
|
||||
MODEL_NAME = "qwen2.5:7b"
|
||||
OLLAMA_API_URL = defines.ollama_api_url
|
||||
MODEL_NAME = defines.model
|
||||
LOG_LEVEL="info"
|
||||
USE_TLS=False
|
||||
WEB_HOST="0.0.0.0"
|
||||
@ -145,19 +147,20 @@ WEB_PORT=5000
|
||||
|
||||
# %%
|
||||
# Globals
|
||||
context_tag = "INFO"
|
||||
system_message = f"""
|
||||
Launched on {DateTime()}.
|
||||
|
||||
When answering queries, follow these steps:
|
||||
|
||||
1. First analyze the query to determine if real-time information might be helpful
|
||||
2. Even when [CONTEXT] is provided, consider whether the tools would provide more current or comprehensive information
|
||||
2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information
|
||||
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
|
||||
4. When both [CONTEXT] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||
5. Always prioritize the most up-to-date and relevant information, whether it comes from [CONTEXT] or tools
|
||||
6. If [CONTEXT] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||
4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||
5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools
|
||||
6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||
|
||||
Always use tools and [CONTEXT] when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||
Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||
""".strip()
|
||||
|
||||
tool_log = []
|
||||
@ -719,23 +722,26 @@ class WebServer:
|
||||
metadata["rag"] = { "name": rag["name"], **chroma_results }
|
||||
preamble = ""
|
||||
if len(rag_docs):
|
||||
preamble = "In addition to real-time tools, use the following context to answer the question:\n[CONTEXT]:\n"
|
||||
preamble = f"""
|
||||
1. Respond to this query: {content}
|
||||
2. If there is information in this context to enhance the answer, do so:
|
||||
[{context_tag}]:\n"""
|
||||
for doc in rag_docs:
|
||||
preamble += doc
|
||||
preamble += "\n[/CONTEXT]\nHuman: "
|
||||
preamble += f"\n[/{context_tag}]\nUse all of that information to respond to: "
|
||||
|
||||
# Figure
|
||||
llm_history.append({"role": "user", "content": preamble + content})
|
||||
user_history.append({"role": "user", "content": content})
|
||||
|
||||
messages = context["system"] + llm_history[-1:]
|
||||
|
||||
messages = context["system"] + llm_history
|
||||
|
||||
try:
|
||||
yield {"status": "processing", "message": "Processing request..."}
|
||||
|
||||
# Use the async generator in an async for loop
|
||||
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]))
|
||||
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': defines.max_context })
|
||||
|
||||
tools_used = []
|
||||
|
||||
yield {"status": "processing", "message": "Initial response received..."}
|
||||
@ -775,7 +781,7 @@ class WebServer:
|
||||
metadata["tools"] = tools_used
|
||||
|
||||
yield {"status": "processing", "message": "Generating final response..."}
|
||||
response = self.client.chat(model=self.model, messages=messages, stream=False)
|
||||
response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': defines.max_context })
|
||||
|
||||
reply = response['message']['content']
|
||||
final_message = {"role": "assistant", "content": reply }
|
||||
|
@ -1,4 +1,7 @@
|
||||
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
||||
model="qwen2.5:7b"
|
||||
#model = "deepseek-r1:7b"
|
||||
model = "llama3.2"
|
||||
#model="qwen2.5:7b"
|
||||
encoding_model="mxbai-embed-large"
|
||||
persist_directory="./chromadb"
|
||||
persist_directory="./chromadb"
|
||||
max_context = 2048*8
|
@ -59,7 +59,7 @@ def get_vector_collection(path=defines.persist_directory, name="documents"):
|
||||
|
||||
# Function to generate embeddings using Ollama
|
||||
def get_embedding(llm, text):
|
||||
response = llm.embeddings(model=defines.model, prompt=text)
|
||||
response = llm.embeddings(model=defines.model, prompt=text, options={ 'num_ctx': defines.max_context })
|
||||
return response["embedding"]
|
||||
|
||||
def add_embeddings_to_collection(llm, collection, chunks):
|
||||
|
Loading…
x
Reference in New Issue
Block a user