From cc0f6974ffa8bfb2ac8b44f3d3276974536f17ac Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Fri, 2 May 2025 14:09:12 -0700 Subject: [PATCH] Tools are working and shared context is in use aross all agents --- src/utils/agents/base.py | 30 +++++++++++++++--------------- src/utils/agents/chat.py | 32 +++++++++++++++++++------------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/utils/agents/base.py b/src/utils/agents/base.py index 23f49ff..c9e0c2b 100644 --- a/src/utils/agents/base.py +++ b/src/utils/agents/base.py @@ -46,17 +46,21 @@ class Agent(BaseModel, ABC): _content_seed: str = PrivateAttr(default="") def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int: - # Get more accurate token count estimate using tiktoken or similar - response = llm.generate( - model=model, - prompt=prompt, - options={ - "num_ctx": self.context_size, - "num_predict": 0, - } # Don't generate any tokens, just tokenize - ) - # The prompt_eval_count gives you the token count of your input - tokens = response.get("prompt_eval_count", 0) + # # Get more accurate token count estimate using tiktoken or similar + # response = llm.generate( + # model=model, + # prompt=prompt, + # options={ + # "num_ctx": self.context_size, + # "num_predict": 0, + # } # Don't generate any tokens, just tokenize + # ) + # # The prompt_eval_count gives you the token count of your input + # tokens = response.get("prompt_eval_count", 0) + + # Most models average 1.3-1.5 tokens per word + word_count = len(prompt.split()) + tokens = int(word_count * 1.4) # Add buffer for safety total_ctx = tokens + ctx_buffer @@ -91,10 +95,6 @@ class Agent(BaseModel, ABC): """Return the set of valid agent_type values.""" return set(get_args(cls.__annotations__["agent_type"])) - def agent_function_display(self): - import inspect - logger.info(f"{self.agent_type} - {inspect.stack()[1].function}") - def set_context(self, context): object.__setattr__(self, "context", context) diff --git a/src/utils/agents/chat.py b/src/utils/agents/chat.py index 62a44e0..e4c33c1 100644 --- a/src/utils/agents/chat.py +++ b/src/utils/agents/chat.py @@ -13,6 +13,7 @@ from .. import tools as Tools from ollama import ChatResponse import json import time +import inspect class Chat(Agent, ABC): """ @@ -26,7 +27,8 @@ class Chat(Agent, ABC): """ Prepare message with context information in message.preamble """ - self.agent_function_display() + logging.info(f"{self.agent_type} - {inspect.stack()[1].function}") + if not self.context: raise ValueError("Context is not set for this agent.") @@ -73,7 +75,8 @@ class Chat(Agent, ABC): return async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]: - self.agent_function_display() + logging.info(f"{self.agent_type} - {inspect.stack()[1].function}") + if not self.context: raise ValueError("Context is not set for this agent.") if not message.metadata["tools"]: @@ -191,7 +194,8 @@ class Chat(Agent, ABC): return async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]: - self.agent_function_display() + logging.info(f"{self.agent_type} - {inspect.stack()[1].function}") + if not self.context: raise ValueError("Context is not set for this agent.") @@ -299,6 +303,7 @@ class Chat(Agent, ABC): return # not use_tools + yield message # Reset the response for streaming message.response = "" start_time = time.perf_counter() @@ -333,7 +338,8 @@ class Chat(Agent, ABC): return async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]: - self.agent_function_display() + logging.info(f"{self.agent_type} - {inspect.stack()[1].function}") + if not self.context: raise ValueError("Context is not set for this agent.") @@ -353,6 +359,9 @@ class Chat(Agent, ABC): message.context_prompt += f"{message.prompt}" # Estimate token length of new messages + message.response = f"Optimizing context..." + message.status = "thinking" + yield message message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt) message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..." @@ -360,16 +369,13 @@ class Chat(Agent, ABC): yield message async for message in self.generate_llm_response(llm, model, message): - # logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}") - if message.status == "error": - yield message - self.context.processing = False - return - if message.status != "done": - yield message + # logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}") + if message.status == "error": + yield message + self.context.processing = False + return + yield message - yield message - # Done processing, add message to conversation message.status = "done" self.conversation.add_message(message)