Tools and RAG are working together!!
This commit is contained in:
parent
cf29c85449
commit
5f1f641dba
@ -318,11 +318,9 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
|
|||||||
return <></>
|
return <></>
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(JSON.stringify(metadata.tools[0].result, null, 2));
|
|
||||||
|
|
||||||
return (<>
|
return (<>
|
||||||
{
|
{
|
||||||
metadata.tools !== undefined &&
|
metadata.tools !== undefined && metadata.tools.length !== 0 &&
|
||||||
<Typography sx={{ marginBottom: 2 }}>
|
<Typography sx={{ marginBottom: 2 }}>
|
||||||
<p>Tools queried:</p>
|
<p>Tools queried:</p>
|
||||||
{metadata.tools.map((tool: any, index: number) => <>
|
{metadata.tools.map((tool: any, index: number) => <>
|
||||||
@ -340,7 +338,7 @@ const MessageMeta = ({ metadata }: MessageMetaInterface) => {
|
|||||||
{
|
{
|
||||||
metadata.rag.name !== undefined &&
|
metadata.rag.name !== undefined &&
|
||||||
<Typography sx={{ marginBottom: 2 }}>
|
<Typography sx={{ marginBottom: 2 }}>
|
||||||
<p>RAG from '{metadata.rag.name}' collection matches against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
|
<p>Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions:</p>
|
||||||
{metadata.rag.ids.map((id: number, index: number) => <>
|
{metadata.rag.ids.map((id: number, index: number) => <>
|
||||||
<Divider />
|
<Divider />
|
||||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }} key={index}>
|
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }} key={index}>
|
||||||
|
@ -52,7 +52,10 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
|||||||
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
|
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from utils import rag as Rag
|
from utils import (
|
||||||
|
rag as Rag,
|
||||||
|
defines
|
||||||
|
)
|
||||||
|
|
||||||
from tools import (
|
from tools import (
|
||||||
DateTime,
|
DateTime,
|
||||||
@ -129,15 +132,14 @@ def system_info(model):
|
|||||||
"System RAM": get_installed_ram(),
|
"System RAM": get_installed_ram(),
|
||||||
"Graphics Card": get_graphics_cards(),
|
"Graphics Card": get_graphics_cards(),
|
||||||
"CPU": get_cpu_info(),
|
"CPU": get_cpu_info(),
|
||||||
"LLM Model": model
|
"LLM Model": model,
|
||||||
|
"Context length": defines.max_context
|
||||||
}
|
}
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Defaults
|
# Defaults
|
||||||
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
|
OLLAMA_API_URL = defines.ollama_api_url
|
||||||
#MODEL_NAME = "deepseek-r1:7b"
|
MODEL_NAME = defines.model
|
||||||
#MODEL_NAME = "llama3.2"
|
|
||||||
MODEL_NAME = "qwen2.5:7b"
|
|
||||||
LOG_LEVEL="info"
|
LOG_LEVEL="info"
|
||||||
USE_TLS=False
|
USE_TLS=False
|
||||||
WEB_HOST="0.0.0.0"
|
WEB_HOST="0.0.0.0"
|
||||||
@ -145,19 +147,20 @@ WEB_PORT=5000
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Globals
|
# Globals
|
||||||
|
context_tag = "INFO"
|
||||||
system_message = f"""
|
system_message = f"""
|
||||||
Launched on {DateTime()}.
|
Launched on {DateTime()}.
|
||||||
|
|
||||||
When answering queries, follow these steps:
|
When answering queries, follow these steps:
|
||||||
|
|
||||||
1. First analyze the query to determine if real-time information might be helpful
|
1. First analyze the query to determine if real-time information might be helpful
|
||||||
2. Even when [CONTEXT] is provided, consider whether the tools would provide more current or comprehensive information
|
2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information
|
||||||
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
|
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
|
||||||
4. When both [CONTEXT] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||||
5. Always prioritize the most up-to-date and relevant information, whether it comes from [CONTEXT] or tools
|
5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools
|
||||||
6. If [CONTEXT] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||||
|
|
||||||
Always use tools and [CONTEXT] when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
tool_log = []
|
tool_log = []
|
||||||
@ -719,23 +722,26 @@ class WebServer:
|
|||||||
metadata["rag"] = { "name": rag["name"], **chroma_results }
|
metadata["rag"] = { "name": rag["name"], **chroma_results }
|
||||||
preamble = ""
|
preamble = ""
|
||||||
if len(rag_docs):
|
if len(rag_docs):
|
||||||
preamble = "In addition to real-time tools, use the following context to answer the question:\n[CONTEXT]:\n"
|
preamble = f"""
|
||||||
|
1. Respond to this query: {content}
|
||||||
|
2. If there is information in this context to enhance the answer, do so:
|
||||||
|
[{context_tag}]:\n"""
|
||||||
for doc in rag_docs:
|
for doc in rag_docs:
|
||||||
preamble += doc
|
preamble += doc
|
||||||
preamble += "\n[/CONTEXT]\nHuman: "
|
preamble += f"\n[/{context_tag}]\nUse all of that information to respond to: "
|
||||||
|
|
||||||
# Figure
|
# Figure
|
||||||
llm_history.append({"role": "user", "content": preamble + content})
|
llm_history.append({"role": "user", "content": preamble + content})
|
||||||
user_history.append({"role": "user", "content": content})
|
user_history.append({"role": "user", "content": content})
|
||||||
|
|
||||||
messages = context["system"] + llm_history[-1:]
|
messages = context["system"] + llm_history
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield {"status": "processing", "message": "Processing request..."}
|
yield {"status": "processing", "message": "Processing request..."}
|
||||||
|
|
||||||
# Use the async generator in an async for loop
|
# Use the async generator in an async for loop
|
||||||
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]))
|
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': defines.max_context })
|
||||||
|
|
||||||
tools_used = []
|
tools_used = []
|
||||||
|
|
||||||
yield {"status": "processing", "message": "Initial response received..."}
|
yield {"status": "processing", "message": "Initial response received..."}
|
||||||
@ -775,7 +781,7 @@ class WebServer:
|
|||||||
metadata["tools"] = tools_used
|
metadata["tools"] = tools_used
|
||||||
|
|
||||||
yield {"status": "processing", "message": "Generating final response..."}
|
yield {"status": "processing", "message": "Generating final response..."}
|
||||||
response = self.client.chat(model=self.model, messages=messages, stream=False)
|
response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': defines.max_context })
|
||||||
|
|
||||||
reply = response['message']['content']
|
reply = response['message']['content']
|
||||||
final_message = {"role": "assistant", "content": reply }
|
final_message = {"role": "assistant", "content": reply }
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
||||||
model="qwen2.5:7b"
|
#model = "deepseek-r1:7b"
|
||||||
|
model = "llama3.2"
|
||||||
|
#model="qwen2.5:7b"
|
||||||
encoding_model="mxbai-embed-large"
|
encoding_model="mxbai-embed-large"
|
||||||
persist_directory="./chromadb"
|
persist_directory="./chromadb"
|
||||||
|
max_context = 2048*8
|
@ -59,7 +59,7 @@ def get_vector_collection(path=defines.persist_directory, name="documents"):
|
|||||||
|
|
||||||
# Function to generate embeddings using Ollama
|
# Function to generate embeddings using Ollama
|
||||||
def get_embedding(llm, text):
|
def get_embedding(llm, text):
|
||||||
response = llm.embeddings(model=defines.model, prompt=text)
|
response = llm.embeddings(model=defines.model, prompt=text, options={ 'num_ctx': defines.max_context })
|
||||||
return response["embedding"]
|
return response["embedding"]
|
||||||
|
|
||||||
def add_embeddings_to_collection(llm, collection, chunks):
|
def add_embeddings_to_collection(llm, collection, chunks):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user