Tools and RAG are working together!!

This commit is contained in:
James Ketr 2025-04-01 22:59:14 -07:00
parent cf29c85449
commit 5f1f641dba
4 changed files with 31 additions and 24 deletions

View File

@ -318,11 +318,9 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
return <></> return <></>
} }
console.log(JSON.stringify(metadata.tools[0].result, null, 2));
return (<> return (<>
{ {
metadata.tools !== undefined && metadata.tools !== undefined && metadata.tools.length !== 0 &&
<Typography sx={{ marginBottom: 2 }}> <Typography sx={{ marginBottom: 2 }}>
<p>Tools queried:</p> <p>Tools queried:</p>
{metadata.tools.map((tool: any, index: number) => <> {metadata.tools.map((tool: any, index: number) => <>
@ -340,7 +338,7 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
{ {
metadata.rag.name !== undefined && metadata.rag.name !== undefined &&
<Typography sx={{ marginBottom: 2 }}> <Typography sx={{ marginBottom: 2 }}>
<p>RAG from '{metadata.rag.name}' collection matches against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p> <p>Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
{metadata.rag.ids.map((id: number, index: number) => <> {metadata.rag.ids.map((id: number, index: number) => <>
<Divider /> <Divider />
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }} key={index}> <Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }} key={index}>

View File

@ -52,7 +52,10 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from utils import rag as Rag from utils import (
rag as Rag,
defines
)
from tools import ( from tools import (
DateTime, DateTime,
@ -129,15 +132,14 @@ def system_info(model):
"System RAM": get_installed_ram(), "System RAM": get_installed_ram(),
"Graphics Card": get_graphics_cards(), "Graphics Card": get_graphics_cards(),
"CPU": get_cpu_info(), "CPU": get_cpu_info(),
"LLM Model": model "LLM Model": model,
"Context length": defines.max_context
} }
# %% # %%
# Defaults # Defaults
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint OLLAMA_API_URL = defines.ollama_api_url
#MODEL_NAME = "deepseek-r1:7b" MODEL_NAME = defines.model
#MODEL_NAME = "llama3.2"
MODEL_NAME = "qwen2.5:7b"
LOG_LEVEL="info" LOG_LEVEL="info"
USE_TLS=False USE_TLS=False
WEB_HOST="0.0.0.0" WEB_HOST="0.0.0.0"
@ -145,19 +147,20 @@ WEB_PORT=5000
# %% # %%
# Globals # Globals
context_tag = "INFO"
system_message = f""" system_message = f"""
Launched on {DateTime()}. Launched on {DateTime()}.
When answering queries, follow these steps: When answering queries, follow these steps:
1. First analyze the query to determine if real-time information might be helpful 1. First analyze the query to determine if real-time information might be helpful
2. Even when [CONTEXT] is provided, consider whether the tools would provide more current or comprehensive information 2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available 3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
4. When both [CONTEXT] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer 4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
5. Always prioritize the most up-to-date and relevant information, whether it comes from [CONTEXT] or tools 5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools
6. If [CONTEXT] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data 6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
Always use tools and [CONTEXT] when possible. Be concise, and never make up information. If you do not know the answer, say so. Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so.
""".strip() """.strip()
tool_log = [] tool_log = []
@ -719,23 +722,26 @@ class WebServer:
metadata["rag"] = { "name": rag["name"], **chroma_results } metadata["rag"] = { "name": rag["name"], **chroma_results }
preamble = "" preamble = ""
if len(rag_docs): if len(rag_docs):
preamble = "In addition to real-time tools, use the following context to answer the question:\n[CONTEXT]:\n" preamble = f"""
1. Respond to this query: {content}
2. If there is information in this context to enhance the answer, do so:
[{context_tag}]:\n"""
for doc in rag_docs: for doc in rag_docs:
preamble += doc preamble += doc
preamble += "\n[/CONTEXT]\nHuman: " preamble += f"\n[/{context_tag}]\nUse all of that information to respond to: "
# Figure # Figure
llm_history.append({"role": "user", "content": preamble + content}) llm_history.append({"role": "user", "content": preamble + content})
user_history.append({"role": "user", "content": content}) user_history.append({"role": "user", "content": content})
messages = context["system"] + llm_history[-1:] messages = context["system"] + llm_history
try: try:
yield {"status": "processing", "message": "Processing request..."} yield {"status": "processing", "message": "Processing request..."}
# Use the async generator in an async for loop # Use the async generator in an async for loop
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"])) response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': defines.max_context })
tools_used = [] tools_used = []
yield {"status": "processing", "message": "Initial response received..."} yield {"status": "processing", "message": "Initial response received..."}
@ -775,7 +781,7 @@ class WebServer:
metadata["tools"] = tools_used metadata["tools"] = tools_used
yield {"status": "processing", "message": "Generating final response..."} yield {"status": "processing", "message": "Generating final response..."}
response = self.client.chat(model=self.model, messages=messages, stream=False) response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': defines.max_context })
reply = response['message']['content'] reply = response['message']['content']
final_message = {"role": "assistant", "content": reply } final_message = {"role": "assistant", "content": reply }

View File

@ -1,4 +1,7 @@
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
model="qwen2.5:7b" #model = "deepseek-r1:7b"
model = "llama3.2"
#model="qwen2.5:7b"
encoding_model="mxbai-embed-large" encoding_model="mxbai-embed-large"
persist_directory="./chromadb" persist_directory="./chromadb"
max_context = 2048*8

View File

@ -59,7 +59,7 @@ def get_vector_collection(path=defines.persist_directory, name="documents"):
# Function to generate embeddings using Ollama # Function to generate embeddings using Ollama
def get_embedding(llm, text): def get_embedding(llm, text):
response = llm.embeddings(model=defines.model, prompt=text) response = llm.embeddings(model=defines.model, prompt=text, options={ 'num_ctx': defines.max_context })
return response["embedding"] return response["embedding"]
def add_embeddings_to_collection(llm, collection, chunks): def add_embeddings_to_collection(llm, collection, chunks):