Tools are working and shared context is in use aross all agents

2025-05-02 14:09:12 -07:00 · 2025-05-02 14:09:12 -07:00 · cc0f6974ff
commit cc0f6974ff
parent 202060f5b5
2 changed files with 34 additions and 28 deletions
--- a/src/utils/agents/base.py
+++ b/src/utils/agents/base.py
@ -46,17 +46,21 @@ class Agent(BaseModel, ABC):
    _content_seed: str = PrivateAttr(default="")

    def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
-        # Get more accurate token count estimate using tiktoken or similar
-        response = llm.generate(
-            model=model,
-            prompt=prompt,
-            options={
-                "num_ctx": self.context_size,
-                "num_predict": 0,
-            }  # Don't generate any tokens, just tokenize
-        )
-        # The prompt_eval_count gives you the token count of your input
-        tokens = response.get("prompt_eval_count", 0)        
+        # # Get more accurate token count estimate using tiktoken or similar
+        # response = llm.generate(
+        #     model=model,
+        #     prompt=prompt,
+        #     options={
+        #         "num_ctx": self.context_size,
+        #         "num_predict": 0,
+        #     }  # Don't generate any tokens, just tokenize
+        # )
+        # # The prompt_eval_count gives you the token count of your input
+        # tokens = response.get("prompt_eval_count", 0)        
+        
+        # Most models average 1.3-1.5 tokens per word
+        word_count = len(prompt.split())
+        tokens = int(word_count * 1.4)
        
        # Add buffer for safety
        total_ctx = tokens + ctx_buffer
@ -91,10 +95,6 @@ class Agent(BaseModel, ABC):
        """Return the set of valid agent_type values."""
        return set(get_args(cls.__annotations__["agent_type"]))

-    def agent_function_display(self):
-        import inspect
-        logger.info(f"{self.agent_type} - {inspect.stack()[1].function}")
-
    def set_context(self, context):
        object.__setattr__(self, "context", context)

--- a/src/utils/agents/chat.py
+++ b/src/utils/agents/chat.py
@ -13,6 +13,7 @@ from .. import tools as Tools
 from ollama import ChatResponse
 import json
 import time
+import inspect

 class Chat(Agent, ABC):
  """
@ -26,7 +27,8 @@ class Chat(Agent, ABC):
    """
    Prepare message with context information in message.preamble
    """
-    self.agent_function_display()
+    logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
    if not self.context:
       raise ValueError("Context is not set for this agent.")
    
@ -73,7 +75,8 @@ class Chat(Agent, ABC):
    return

  async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]:
-    self.agent_function_display()
+    logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
    if not self.context:
      raise ValueError("Context is not set for this agent.")
    if not message.metadata["tools"]:
@ -191,7 +194,8 @@ class Chat(Agent, ABC):
    return
        
  async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
-    self.agent_function_display()
+    logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
    if not self.context:
        raise ValueError("Context is not set for this agent.")

@ -299,6 +303,7 @@ class Chat(Agent, ABC):
      return

    # not use_tools
+    yield message
    # Reset the response for streaming
    message.response = ""
    start_time = time.perf_counter()
@ -333,7 +338,8 @@ class Chat(Agent, ABC):
    return
  
  async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]:
-      self.agent_function_display()
+      logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
      if not self.context:
          raise ValueError("Context is not set for this agent.")
          
@ -353,6 +359,9 @@ class Chat(Agent, ABC):
      message.context_prompt += f"{message.prompt}"

      # Estimate token length of new messages
+      message.response = f"Optimizing context..."
+      message.status = "thinking"
+      yield message
      message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
      
      message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
@ -365,9 +374,6 @@ class Chat(Agent, ABC):
          yield message
          self.context.processing = False
          return
-          if message.status != "done":
-              yield message
-      
        yield message
      
      # Done processing, add message to conversation