Tools are working and shared context is in use aross all agents

This commit is contained in:
James Ketr 2025-05-02 14:09:12 -07:00
parent 202060f5b5
commit cc0f6974ff
2 changed files with 34 additions and 28 deletions

View File

@ -46,17 +46,21 @@ class Agent(BaseModel, ABC):
_content_seed: str = PrivateAttr(default="") _content_seed: str = PrivateAttr(default="")
def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int: def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
# Get more accurate token count estimate using tiktoken or similar # # Get more accurate token count estimate using tiktoken or similar
response = llm.generate( # response = llm.generate(
model=model, # model=model,
prompt=prompt, # prompt=prompt,
options={ # options={
"num_ctx": self.context_size, # "num_ctx": self.context_size,
"num_predict": 0, # "num_predict": 0,
} # Don't generate any tokens, just tokenize # } # Don't generate any tokens, just tokenize
) # )
# The prompt_eval_count gives you the token count of your input # # The prompt_eval_count gives you the token count of your input
tokens = response.get("prompt_eval_count", 0) # tokens = response.get("prompt_eval_count", 0)
# Most models average 1.3-1.5 tokens per word
word_count = len(prompt.split())
tokens = int(word_count * 1.4)
# Add buffer for safety # Add buffer for safety
total_ctx = tokens + ctx_buffer total_ctx = tokens + ctx_buffer
@ -91,10 +95,6 @@ class Agent(BaseModel, ABC):
"""Return the set of valid agent_type values.""" """Return the set of valid agent_type values."""
return set(get_args(cls.__annotations__["agent_type"])) return set(get_args(cls.__annotations__["agent_type"]))
def agent_function_display(self):
import inspect
logger.info(f"{self.agent_type} - {inspect.stack()[1].function}")
def set_context(self, context): def set_context(self, context):
object.__setattr__(self, "context", context) object.__setattr__(self, "context", context)

View File

@ -13,6 +13,7 @@ from .. import tools as Tools
from ollama import ChatResponse from ollama import ChatResponse
import json import json
import time import time
import inspect
class Chat(Agent, ABC): class Chat(Agent, ABC):
""" """
@ -26,7 +27,8 @@ class Chat(Agent, ABC):
""" """
Prepare message with context information in message.preamble Prepare message with context information in message.preamble
""" """
self.agent_function_display() logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
if not self.context: if not self.context:
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
@ -73,7 +75,8 @@ class Chat(Agent, ABC):
return return
async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]: async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]:
self.agent_function_display() logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
if not self.context: if not self.context:
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
if not message.metadata["tools"]: if not message.metadata["tools"]:
@ -191,7 +194,8 @@ class Chat(Agent, ABC):
return return
async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]: async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
self.agent_function_display() logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
if not self.context: if not self.context:
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
@ -299,6 +303,7 @@ class Chat(Agent, ABC):
return return
# not use_tools # not use_tools
yield message
# Reset the response for streaming # Reset the response for streaming
message.response = "" message.response = ""
start_time = time.perf_counter() start_time = time.perf_counter()
@ -333,7 +338,8 @@ class Chat(Agent, ABC):
return return
async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]: async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]:
self.agent_function_display() logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
if not self.context: if not self.context:
raise ValueError("Context is not set for this agent.") raise ValueError("Context is not set for this agent.")
@ -353,6 +359,9 @@ class Chat(Agent, ABC):
message.context_prompt += f"{message.prompt}" message.context_prompt += f"{message.prompt}"
# Estimate token length of new messages # Estimate token length of new messages
message.response = f"Optimizing context..."
message.status = "thinking"
yield message
message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt) message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..." message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
@ -360,15 +369,12 @@ class Chat(Agent, ABC):
yield message yield message
async for message in self.generate_llm_response(llm, model, message): async for message in self.generate_llm_response(llm, model, message):
# logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}") # logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
if message.status == "error": if message.status == "error":
yield message yield message
self.context.processing = False self.context.processing = False
return return
if message.status != "done": yield message
yield message
yield message
# Done processing, add message to conversation # Done processing, add message to conversation
message.status = "done" message.status = "done"