Tools are working and shared context is in use aross all agents

This commit is contained in:
James Ketr 2025-05-02 13:57:09 -07:00
parent baaa6e8559
commit 202060f5b5
8 changed files with 156 additions and 136 deletions

View File

@ -162,7 +162,6 @@ function ChatBubble(props: ChatBubbleProps) {
); );
} }
console.log(role);
return ( return (
<Box className={className} sx={{ ...(styles[role] !== undefined ? styles[role] : styles["status"]), gap: 1, display: "flex", ...sx, flexDirection: "row" }}> <Box className={className} sx={{ ...(styles[role] !== undefined ? styles[role] : styles["status"]), gap: 1, display: "flex", ...sx, flexDirection: "row" }}>
{icons[role] !== undefined && icons[role]} {icons[role] !== undefined && icons[role]}

View File

@ -201,17 +201,13 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
// isProcessing?: boolean, // isProcessing?: boolean,
// metadata?: MessageMetaData // metadata?: MessageMetaData
// }; // };
setConversation(backstoryMessages.flatMap((message: BackstoryMessage) => [{ setConversation(backstoryMessages.flatMap((backstoryMessage: BackstoryMessage) => [{
role: 'user', role: 'user',
content: message.prompt || "", content: backstoryMessage.prompt || "",
}, { }, {
...backstoryMessage,
role: 'assistant', role: 'assistant',
prompt: message.prompt || "", content: backstoryMessage.response || "",
preamble: message.preamble || {},
full_content: message.full_content || "",
content: message.response || "",
metadata: message.metadata,
actions: message.actions,
}] as MessageList)); }] as MessageList));
setNoInteractions(false); setNoInteractions(false);
} }
@ -400,17 +396,10 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
const backstoryMessage: BackstoryMessage = update; const backstoryMessage: BackstoryMessage = update;
setConversation([ setConversation([
...conversationRef.current, { ...conversationRef.current, {
// role: 'user', ...backstoryMessage,
// content: backstoryMessage.prompt || "",
// }, {
role: 'assistant', role: 'assistant',
origin: type, origin: type,
content: backstoryMessage.response || "", content: backstoryMessage.response || "",
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList); }] as MessageList);
// Add a small delay to ensure React has time to update the UI // Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0)); await new Promise(resolve => setTimeout(resolve, 0));

View File

@ -33,7 +33,6 @@ type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error
type MessageData = { type MessageData = {
role: MessageRoles, role: MessageRoles,
content: string, content: string,
full_content?: string,
disableCopy?: boolean, disableCopy?: boolean,
user?: string, user?: string,
@ -101,56 +100,46 @@ const MessageMeta = (props: MessageMetaProps) => {
const message = props.messageProps.message; const message = props.messageProps.message;
return (<> return (<>
<Box sx={{ fontSize: "0.8rem", mb: 1 }}>
Below is the LLM performance of this query. Note that if tools are called, the
entire context is processed for each separate tool request by the LLM. This
can dramatically increase the total time for a response.
</Box>
<TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
<Table aria-label="prompt stats" size="small">
<TableHead>
<TableRow>
<TableCell></TableCell>
<TableCell align="right" >Tokens</TableCell>
<TableCell align="right">Time (s)</TableCell>
<TableCell align="right">TPS</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow key="prompt" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Prompt</TableCell>
<TableCell align="right">{prompt_eval_count}</TableCell>
<TableCell align="right">{Math.round(prompt_eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(prompt_eval_count * 10 ** 9 / prompt_eval_duration)}</TableCell>
</TableRow>
<TableRow key="response" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Response</TableCell>
<TableCell align="right">{eval_count}</TableCell>
<TableCell align="right">{Math.round(eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(eval_count * 10 ** 9 / eval_duration)}</TableCell>
</TableRow>
<TableRow key="total" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Total</TableCell>
<TableCell align="right">{prompt_eval_count + eval_count}</TableCell>
<TableCell align="right">{Math.round((prompt_eval_duration + eval_duration) / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round((prompt_eval_count + eval_count) * 10 ** 9 / (prompt_eval_duration + eval_duration))}</TableCell>
</TableRow>
</TableBody>
</Table>
</TableContainer>
{ {
message.full_content !== undefined && prompt_eval_duration !== 0 && eval_duration !== 0 && <>
<Accordion> <Box sx={{ fontSize: "0.8rem", mb: 1 }}>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> Below is the LLM performance of this query. Note that if tools are called, the
<Box sx={{ fontSize: "0.8rem" }}> entire context is processed for each separate tool request by the LLM. This
Full Query can dramatically increase the total time for a response.
</Box> </Box>
</AccordionSummary> <TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
<AccordionDetails> <Table aria-label="prompt stats" size="small">
<pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={message.full_content?.trim()} />{message.full_content?.trim()}</pre> <TableHead>
</AccordionDetails> <TableRow>
</Accordion> <TableCell></TableCell>
<TableCell align="right" >Tokens</TableCell>
<TableCell align="right">Time (s)</TableCell>
<TableCell align="right">TPS</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow key="prompt" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Prompt</TableCell>
<TableCell align="right">{prompt_eval_count}</TableCell>
<TableCell align="right">{Math.round(prompt_eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(prompt_eval_count * 10 ** 9 / prompt_eval_duration)}</TableCell>
</TableRow>
<TableRow key="response" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Response</TableCell>
<TableCell align="right">{eval_count}</TableCell>
<TableCell align="right">{Math.round(eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(eval_count * 10 ** 9 / eval_duration)}</TableCell>
</TableRow>
<TableRow key="total" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Total</TableCell>
<TableCell align="right">{prompt_eval_count + eval_count}</TableCell>
<TableCell align="right">{Math.round((prompt_eval_duration + eval_duration) / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round((prompt_eval_count + eval_count) * 10 ** 9 / (prompt_eval_duration + eval_duration))}</TableCell>
</TableRow>
</TableBody>
</Table>
</TableContainer>
</>
} }
{ {
tools !== undefined && tools.tool_calls && tools.tool_calls.length !== 0 && tools !== undefined && tools.tool_calls && tools.tool_calls.length !== 0 &&
@ -216,33 +205,19 @@ const MessageMeta = (props: MessageMetaProps) => {
<Accordion> <Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> <AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}> <Box sx={{ fontSize: "0.8rem" }}>
All response fields Full Response Details
</Box> </Box>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
{Object.entries(message) <JsonView displayDataTypes={false} objectSortKeys={true} collapsed={2} value={message} style={{ fontSize: "0.8rem", maxHeight: "20rem", overflow: "auto" }}>
.filter(([key, value]) => key !== undefined && value !== undefined) <JsonView.String
.map(([key, value]) => (typeof (value) !== "string" || value?.trim() !== "") && render={({ children, ...reset }) => {
<Accordion key={key}> if (typeof (children) === "string" && children.match("\n")) {
<AccordionSummary sx={{ fontSize: "1rem", fontWeight: "bold" }} expandIcon={<ExpandMoreIcon />}> return <pre {...reset} style={{ display: "inline", border: "none", ...reset.style }}>{children.trim()}</pre>
{key} }
</AccordionSummary> }}
<AccordionDetails> />
{typeof (value) === "string" ? </JsonView>
<pre style={{ border: "none", margin: 0, padding: 0 }}>{value}</pre> :
<JsonView displayDataTypes={false} objectSortKeys={true} collapsed={2} value={value as any} style={{ fontSize: "0.8rem", maxHeight: "20rem", overflow: "auto" }}>
<JsonView.String
render={({ children, ...reset }) => {
if (typeof (children) === "string" && children.match("\n")) {
return <pre {...reset} style={{ display: "flex", border: "none", ...reset.style }}>{children}</pre>
}
}}
/>
</JsonView>
}
</AccordionDetails>
</Accordion>
)}
</AccordionDetails> </AccordionDetails>
</Accordion> </Accordion>
</>); </>);

View File

@ -17,6 +17,7 @@ import re
import math import math
import warnings import warnings
from typing import Any from typing import Any
from collections import deque
from uuid import uuid4 from uuid import uuid4
@ -66,12 +67,6 @@ rags = [
system_message = f""" system_message = f"""
Launched on {Tools.DateTime()}. Launched on {Tools.DateTime()}.
You have access to tools to get real time access to:
- AnalyzeSite: Allows you to look up information on the Internet
- TickerValue: Allows you to find stock price values
- DateTime: Allows you to get the current date and time
- WeatherForecast: Allows you to get the weather forecast for a given location
When answering queries, follow these steps: When answering queries, follow these steps:
- First analyze the query to determine if real-time information from the tools might be helpful - First analyze the query to determine if real-time information from the tools might be helpful
@ -87,6 +82,22 @@ When answering queries, follow these steps:
Always use tools and <|context|> when possible. Be concise, and never make up information. If you do not know the answer, say so. Always use tools and <|context|> when possible. Be concise, and never make up information. If you do not know the answer, say so.
""" """
system_message_old = f"""
Launched on {Tools.DateTime()}.
When answering queries, follow these steps:
1. First analyze the query to determine if real-time information might be helpful
2. Even when <|context|> is provided, consider whether the tools would provide more current or comprehensive information
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
4. When presenting weather forecasts, include relevant emojis immediately before the corresponding text. For example, for a sunny day, say \"☀️ Sunny\" or if the forecast says there will be \"rain showers, say \"🌧️ Rain showers\". Use this mapping for weather emojis: Sunny: ☀️, Cloudy: ☁️, Rainy: 🌧️, Snowy: ❄️
4. When both <|context|> and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
5. Always prioritize the most up-to-date and relevant information, whether it comes from <|context|> or tools
6. If <|context|> and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
Always use tools and <|context|> when possible. Be concise, and never make up information. If you do not know the answer, say so.
""".strip()
system_generate_resume = f""" system_generate_resume = f"""
Launched on {Tools.DateTime()}. Launched on {Tools.DateTime()}.
@ -585,13 +596,25 @@ class WebServer:
# Create a custom generator that ensures flushing # Create a custom generator that ensures flushing
async def flush_generator(): async def flush_generator():
logging.info(f"Message starting. Streaming partial results.")
async for message in self.generate_response(context=context, agent=agent, content=data["content"]): async for message in self.generate_response(context=context, agent=agent, content=data["content"]):
if message.status != "done":
result = {
"status": message.status,
"response": message.response
}
else:
logging.info(f"Message complete. Providing full response.")
result = message.model_dump(mode='json')
result = json.dumps(result) + "\n"
message.network_packets += 1
message.network_bytes += len(result)
# Convert to JSON and add newline # Convert to JSON and add newline
yield json.dumps(message.model_dump(mode='json')) + "\n" yield result
# Save the history as its generated
self.save_context(context_id)
# Explicitly flush after each yield # Explicitly flush after each yield
await asyncio.sleep(0) # Allow the event loop to process the write await asyncio.sleep(0) # Allow the event loop to process the write
# Save the history once completed
self.save_context(context_id)
# Return StreamingResponse with appropriate headers # Return StreamingResponse with appropriate headers
return StreamingResponse( return StreamingResponse(
@ -914,7 +937,7 @@ class WebServer:
} }
else: else:
yield {"status": "complete", "message": "RAG processing complete"} yield {"status": "complete", "message": "RAG processing complete"}
async def generate_response(self, context : Context, agent : Agent, content : str) -> AsyncGenerator[Message, None]: async def generate_response(self, context : Context, agent : Agent, content : str) -> AsyncGenerator[Message, None]:
if not self.file_watcher: if not self.file_watcher:
raise Exception("File watcher not initialized") raise Exception("File watcher not initialized")

View File

@ -4,6 +4,7 @@ from typing import Literal, TypeAlias, get_args, List, Generator, Iterator, Asyn
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing_extensions import Annotated from typing_extensions import Annotated
from .. setup_logging import setup_logging from .. setup_logging import setup_logging
from .. import defines
logger = setup_logging() logger = setup_logging()
@ -22,6 +23,16 @@ class Agent(BaseModel, ABC):
This class defines the common attributes and methods for all agent types. This class defines the common attributes and methods for all agent types.
""" """
# context_size is shared across all subclasses
_context_size: ClassVar[int] = int(defines.max_context * 0.5)
@property
def context_size(self) -> int:
return Agent._context_size
@context_size.setter
def context_size(self, value: int):
Agent._context_size = value
# Agent management with pydantic # Agent management with pydantic
agent_type: Literal["base"] = "base" agent_type: Literal["base"] = "base"
_agent_type: ClassVar[str] = agent_type # Add this for registration _agent_type: ClassVar[str] = agent_type # Add this for registration
@ -34,15 +45,39 @@ class Agent(BaseModel, ABC):
_content_seed: str = PrivateAttr(default="") _content_seed: str = PrivateAttr(default="")
def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
# Get more accurate token count estimate using tiktoken or similar
response = llm.generate(
model=model,
prompt=prompt,
options={
"num_ctx": self.context_size,
"num_predict": 0,
} # Don't generate any tokens, just tokenize
)
# The prompt_eval_count gives you the token count of your input
tokens = response.get("prompt_eval_count", 0)
# Add buffer for safety
total_ctx = tokens + ctx_buffer
if total_ctx > self.context_size:
logger.info(f"Increasing context size from {self.context_size} to {total_ctx}")
# Grow the context size if necessary
self.context_size = max(self.context_size, total_ctx)
# Use actual model maximum context size
return self.context_size
# Class and pydantic model management # Class and pydantic model management
def __init_subclass__(cls, **kwargs): def __init_subclass__(cls, **kwargs) -> None:
"""Auto-register subclasses""" """Auto-register subclasses"""
super().__init_subclass__(**kwargs) super().__init_subclass__(**kwargs)
# Register this class if it has an agent_type # Register this class if it has an agent_type
if hasattr(cls, 'agent_type') and cls.agent_type != Agent._agent_type: if hasattr(cls, 'agent_type') and cls.agent_type != Agent._agent_type:
registry.register(cls.agent_type, cls) registry.register(cls.agent_type, cls)
def model_dump(self, *args, **kwargs): def model_dump(self, *args, **kwargs) -> Any:
# Ensure context is always excluded, even with exclude_unset=True # Ensure context is always excluded, even with exclude_unset=True
kwargs.setdefault("exclude", set()) kwargs.setdefault("exclude", set())
if isinstance(kwargs["exclude"], set): if isinstance(kwargs["exclude"], set):

View File

@ -62,13 +62,11 @@ class Chat(Agent, ABC):
preamble_types_OR = " or ".join(preamble_types) preamble_types_OR = " or ".join(preamble_types)
message.preamble["rules"] = f"""\ message.preamble["rules"] = f"""\
- Answer the question based on the information provided in the {preamble_types_AND} sections by incorporate it seamlessly and refer to it using natural language instead of mentioning {preamble_types_OR} or quoting it directly. - Answer the question based on the information provided in the {preamble_types_AND} sections by incorporate it seamlessly and refer to it using natural language instead of mentioning {preamble_types_OR} or quoting it directly.
- If there is no information in these sections, answer based on your knowledge. - If there is no information in these sections, answer based on your knowledge, or use any available tools.
- Avoid phrases like 'According to the {preamble_types[0]}' or similar references to the {preamble_types_OR}. - Avoid phrases like 'According to the {preamble_types[0]}' or similar references to the {preamble_types_OR}.
""" """
message.preamble["question"] = "Use that information to respond to:" message.preamble["question"] = "Respond to:"
else:
message.preamble["question"] = "Respond to:"
message.system_prompt = self.system_prompt message.system_prompt = self.system_prompt
message.status = "done" message.status = "done"
yield message yield message
@ -80,7 +78,6 @@ class Chat(Agent, ABC):
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
if not message.metadata["tools"]: if not message.metadata["tools"]:
raise ValueError("tools field not initialized") raise ValueError("tools field not initialized")
logging.info(f"LLM - tool processing - {tool_message}")
tool_metadata = message.metadata["tools"] tool_metadata = message.metadata["tools"]
tool_metadata["messages"] = messages tool_metadata["messages"] = messages
@ -95,6 +92,7 @@ class Chat(Agent, ABC):
# Yield status update before processing each tool # Yield status update before processing each tool
message.response = f"Processing tool {i+1}/{len(tool_message.tool_calls)}: {tool}..." message.response = f"Processing tool {i+1}/{len(tool_message.tool_calls)}: {tool}..."
yield message yield message
logging.info(f"LLM - {message.response}")
# Process the tool based on its type # Process the tool based on its type
match tool: match tool:
@ -186,10 +184,10 @@ class Chat(Agent, ABC):
message.metadata["prompt_eval_duration"] += response.prompt_eval_duration message.metadata["prompt_eval_duration"] += response.prompt_eval_duration
self.context_tokens = response.prompt_eval_count + response.eval_count self.context_tokens = response.prompt_eval_count + response.eval_count
message.status = "done" message.status = "done"
yield message
end_time = time.perf_counter() end_time = time.perf_counter()
message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}" message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}"
message.status = "done"
yield message
return return
async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]: async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
@ -197,22 +195,23 @@ class Chat(Agent, ABC):
if not self.context: if not self.context:
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
messages = [ messages = [ { "role": "system", "content": message.system_prompt } ]
messages.extend([
item for m in self.conversation.messages item for m in self.conversation.messages
for item in [ for item in [
{"role": "user", "content": m.prompt}, {"role": "user", "content": m.prompt.strip()},
{"role": "assistant", "content": m.response} {"role": "assistant", "content": m.response.strip()}
] ]
] ])
messages.append({ messages.append({
"role": "user", "role": "user",
"content": message.full_content, "content": message.context_prompt.strip(),
}) })
message.metadata["messages"] = messages
message.metadata["options"]={ message.metadata["options"]={
"seed": 8911, "seed": 8911,
"num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context, "num_ctx": self.context_size,
"temperature": 0.9, # Higher temperature to encourage tool usage #"temperature": 0.9, # Higher temperature to encourage tool usage
} }
message.metadata["timers"] = {} message.metadata["timers"] = {}
@ -222,6 +221,7 @@ class Chat(Agent, ABC):
"available": Tools.llm_tools(self.context.tools), "available": Tools.llm_tools(self.context.tools),
"used": False "used": False
} }
tool_metadata = message.metadata["tools"]
if use_tools: if use_tools:
message.status = "thinking" message.status = "thinking"
@ -232,10 +232,11 @@ class Chat(Agent, ABC):
start_time = time.perf_counter() start_time = time.perf_counter()
# Tools are enabled and available, so query the LLM with a short token target to see if it will # Tools are enabled and available, so query the LLM with a short token target to see if it will
# use the tools # use the tools
tool_metadata["messages"] = [{ "role": "system", "content": self.system_prompt}, {"role": "user", "content": message.prompt}]
response = llm.chat( response = llm.chat(
model=model, model=model,
messages=messages, #[{ "role": "system", "content": self.system_prompt}, {"role": "user", "content": message.prompt}], messages=tool_metadata["messages"],
tools=message.metadata["tools"]["available"], tools=tool_metadata["available"],
options={ options={
**message.metadata["options"], **message.metadata["options"],
#"num_predict": 1024, # "Low" token limit to cut off after tool call #"num_predict": 1024, # "Low" token limit to cut off after tool call
@ -253,7 +254,7 @@ class Chat(Agent, ABC):
logging.info("LLM indicates tools will be used") logging.info("LLM indicates tools will be used")
# Tools are enabled and available and the LLM indicated it will use them # Tools are enabled and available and the LLM indicated it will use them
message.metadata["tools"]["attempted"] = response.message.tool_calls tool_metadata["attempted"] = response.message.tool_calls
message.response = f"Performing tool analysis step 2/2 (tool use suspected)..." message.response = f"Performing tool analysis step 2/2 (tool use suspected)..."
yield message yield message
@ -261,8 +262,8 @@ class Chat(Agent, ABC):
start_time = time.perf_counter() start_time = time.perf_counter()
response = llm.chat( response = llm.chat(
model=model, model=model,
messages=messages, messages=tool_metadata["messages"], # messages,
tools=message.metadata["tools"]["available"], tools=tool_metadata["available"],
options={ options={
**message.metadata["options"], **message.metadata["options"],
}, },
@ -278,7 +279,7 @@ class Chat(Agent, ABC):
return return
if response.message.tool_calls: if response.message.tool_calls:
message.metadata["tools"]["used"] = response.message.tool_calls tool_metadata["used"] = response.message.tool_calls
# Process all yielded items from the handler # Process all yielded items from the handler
start_time = time.perf_counter() start_time = time.perf_counter()
async for message in self.process_tool_calls(llm=llm, model=model, message=message, tool_message=response.message, messages=messages): async for message in self.process_tool_calls(llm=llm, model=model, message=message, tool_message=response.message, messages=messages):
@ -345,13 +346,14 @@ class Chat(Agent, ABC):
self.context.processing = True self.context.processing = True
message.metadata["system_prompt"] = f"<|system|>{self.system_prompt.strip()}\n" message.metadata["system_prompt"] = f"<|system|>\n{self.system_prompt.strip()}\n"
message.context_prompt = ""
for p in message.preamble.keys(): for p in message.preamble.keys():
message.full_content += f"\n<|{p}|>\n{message.preamble[p].strip()}\n" message.context_prompt += f"\n<|{p}|>\n{message.preamble[p].strip()}\n"
message.full_content += f"{message.prompt}" message.context_prompt += f"{message.prompt}"
# Estimate token length of new messages # Estimate token length of new messages
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content) message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..." message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
message.status = "thinking" message.status = "thinking"

View File

@ -33,7 +33,6 @@ class Context(BaseModel):
tools: List[dict] = Tools.default_tools(Tools.tools) tools: List[dict] = Tools.default_tools(Tools.tools)
rags: List[dict] = [] rags: List[dict] = []
message_history_length: int = 5 message_history_length: int = 5
context_tokens: int = 0
# Class managed fields # Class managed fields
agents: List[Annotated[Union[*Agent.__subclasses__()], Field(discriminator="agent_type")]] = Field( agents: List[Annotated[Union[*Agent.__subclasses__()], Field(discriminator="agent_type")]] = Field(
default_factory=list default_factory=list
@ -58,10 +57,6 @@ class Context(BaseModel):
agent.set_context(self) agent.set_context(self)
return self return self
def get_optimal_ctx_size(self, context, messages, ctx_buffer = 4096):
ctx = round(context + len(str(messages)) * 3 / 4)
return max(defines.max_context, min(2048, ctx + ctx_buffer))
def generate_rag_results(self, message: Message) -> Generator[Message, None, None]: def generate_rag_results(self, message: Message) -> Generator[Message, None, None]:
""" """
Generate RAG results for the given query. Generate RAG results for the given query.

View File

@ -14,7 +14,7 @@ class Message(BaseModel):
status: str = "" # Status of the message status: str = "" # Status of the message
preamble: dict[str,str] = {} # Preamble to be prepended to the prompt preamble: dict[str,str] = {} # Preamble to be prepended to the prompt
system_prompt: str = "" # System prompt provided to the LLM system_prompt: str = "" # System prompt provided to the LLM
full_content: str = "" # Full content of the message (preamble + prompt) context_prompt: str = "" # Full content of the message (preamble + prompt)
response: str = "" # LLM response to the preamble + query response: str = "" # LLM response to the preamble + query
metadata: dict[str, Any] = { metadata: dict[str, Any] = {
"rag": List[dict[str, Any]], "rag": List[dict[str, Any]],
@ -22,8 +22,10 @@ class Message(BaseModel):
"eval_duration": 0, "eval_duration": 0,
"prompt_eval_count": 0, "prompt_eval_count": 0,
"prompt_eval_duration": 0, "prompt_eval_duration": 0,
"ctx_size": 0, "context_size": 0,
} }
network_packets: int = 0 # Total number of streaming packets
network_bytes: int = 0 # Total bytes sent while streaming packets
actions: List[str] = [] # Other session modifying actions performed while processing the message actions: List[str] = [] # Other session modifying actions performed while processing the message
timestamp: datetime = datetime.now(timezone.utc) timestamp: datetime = datetime.now(timezone.utc)