Tools and RAG are working together!!

This commit is contained in:
James Ketr 2025-04-01 22:59:14 -07:00
parent cf29c85449
commit 5f1f641dba
4 changed files with 31 additions and 24 deletions

View File

@ -318,11 +318,9 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
return <></>
}
console.log(JSON.stringify(metadata.tools[0].result, null, 2));
return (<>
{
metadata.tools !== undefined &&
metadata.tools !== undefined && metadata.tools.length !== 0 &&
<Typography sx={{ marginBottom: 2 }}>
<p>Tools queried:</p>
{metadata.tools.map((tool: any, index: number) => <>
@ -340,7 +338,7 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
{
metadata.rag.name !== undefined &&
<Typography sx={{ marginBottom: 2 }}>
<p>RAG from '{metadata.rag.name}' collection matches against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
<p>Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
{metadata.rag.ids.map((id: number, index: number) => <>
<Divider />
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }} key={index}>

View File

@ -52,7 +52,10 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
from utils import rag as Rag
from utils import (
rag as Rag,
defines
)
from tools import (
DateTime,
@ -129,15 +132,14 @@ def system_info(model):
"System RAM": get_installed_ram(),
"Graphics Card": get_graphics_cards(),
"CPU": get_cpu_info(),
"LLM Model": model
"LLM Model": model,
"Context length": defines.max_context
}
# %%
# Defaults
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
#MODEL_NAME = "deepseek-r1:7b"
#MODEL_NAME = "llama3.2"
MODEL_NAME = "qwen2.5:7b"
OLLAMA_API_URL = defines.ollama_api_url
MODEL_NAME = defines.model
LOG_LEVEL="info"
USE_TLS=False
WEB_HOST="0.0.0.0"
@ -145,19 +147,20 @@ WEB_PORT=5000
# %%
# Globals
context_tag = "INFO"
system_message = f"""
Launched on {DateTime()}.
When answering queries, follow these steps:
1. First analyze the query to determine if real-time information might be helpful
2. Even when [CONTEXT] is provided, consider whether the tools would provide more current or comprehensive information
2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
4. When both [CONTEXT] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
5. Always prioritize the most up-to-date and relevant information, whether it comes from [CONTEXT] or tools
6. If [CONTEXT] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools
6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
Always use tools and [CONTEXT] when possible. Be concise, and never make up information. If you do not know the answer, say so.
Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so.
""".strip()
tool_log = []
@ -719,23 +722,26 @@ class WebServer:
metadata["rag"] = { "name": rag["name"], **chroma_results }
preamble = ""
if len(rag_docs):
preamble = "In addition to real-time tools, use the following context to answer the question:\n[CONTEXT]:\n"
preamble = f"""
1. Respond to this query: {content}
2. If there is information in this context to enhance the answer, do so:
[{context_tag}]:\n"""
for doc in rag_docs:
preamble += doc
preamble += "\n[/CONTEXT]\nHuman: "
preamble += f"\n[/{context_tag}]\nUse all of that information to respond to: "
# Figure
llm_history.append({"role": "user", "content": preamble + content})
user_history.append({"role": "user", "content": content})
messages = context["system"] + llm_history[-1:]
messages = context["system"] + llm_history
try:
yield {"status": "processing", "message": "Processing request..."}
# Use the async generator in an async for loop
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]))
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': defines.max_context })
tools_used = []
yield {"status": "processing", "message": "Initial response received..."}
@ -775,7 +781,7 @@ class WebServer:
metadata["tools"] = tools_used
yield {"status": "processing", "message": "Generating final response..."}
response = self.client.chat(model=self.model, messages=messages, stream=False)
response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': defines.max_context })
reply = response['message']['content']
final_message = {"role": "assistant", "content": reply }

View File

@ -1,4 +1,7 @@
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
model="qwen2.5:7b"
#model = "deepseek-r1:7b"
model = "llama3.2"
#model="qwen2.5:7b"
encoding_model="mxbai-embed-large"
persist_directory="./chromadb"
persist_directory="./chromadb"
max_context = 2048*8

View File

@ -59,7 +59,7 @@ def get_vector_collection(path=defines.persist_directory, name="documents"):
# Function to generate embeddings using Ollama
def get_embedding(llm, text):
response = llm.embeddings(model=defines.model, prompt=text)
response = llm.embeddings(model=defines.model, prompt=text, options={ 'num_ctx': defines.max_context })
return response["embedding"]
def add_embeddings_to_collection(llm, collection, chunks):