From cc0f6974ffa8bfb2ac8b44f3d3276974536f17ac Mon Sep 17 00:00:00 2001
From: James Ketrenos <james_git@ketrenos.com>
Date: Fri, 2 May 2025 14:09:12 -0700
Subject: [PATCH] Tools are working and shared context is in use aross all
 agents

---
 src/utils/agents/base.py | 30 +++++++++++++++---------------
 src/utils/agents/chat.py | 32 +++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/src/utils/agents/base.py b/src/utils/agents/base.py
index 23f49ff..c9e0c2b 100644
--- a/src/utils/agents/base.py
+++ b/src/utils/agents/base.py
@@ -46,17 +46,21 @@ class Agent(BaseModel, ABC):
     _content_seed: str = PrivateAttr(default="")
 
     def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
-        # Get more accurate token count estimate using tiktoken or similar
-        response = llm.generate(
-            model=model,
-            prompt=prompt,
-            options={
-                "num_ctx": self.context_size,
-                "num_predict": 0,
-            }  # Don't generate any tokens, just tokenize
-        )
-        # The prompt_eval_count gives you the token count of your input
-        tokens = response.get("prompt_eval_count", 0)        
+        # # Get more accurate token count estimate using tiktoken or similar
+        # response = llm.generate(
+        #     model=model,
+        #     prompt=prompt,
+        #     options={
+        #         "num_ctx": self.context_size,
+        #         "num_predict": 0,
+        #     }  # Don't generate any tokens, just tokenize
+        # )
+        # # The prompt_eval_count gives you the token count of your input
+        # tokens = response.get("prompt_eval_count", 0)        
+        
+        # Most models average 1.3-1.5 tokens per word
+        word_count = len(prompt.split())
+        tokens = int(word_count * 1.4)
         
         # Add buffer for safety
         total_ctx = tokens + ctx_buffer
@@ -91,10 +95,6 @@ class Agent(BaseModel, ABC):
         """Return the set of valid agent_type values."""
         return set(get_args(cls.__annotations__["agent_type"]))
 
-    def agent_function_display(self):
-        import inspect
-        logger.info(f"{self.agent_type} - {inspect.stack()[1].function}")
-
     def set_context(self, context):
         object.__setattr__(self, "context", context)
 
diff --git a/src/utils/agents/chat.py b/src/utils/agents/chat.py
index 62a44e0..e4c33c1 100644
--- a/src/utils/agents/chat.py
+++ b/src/utils/agents/chat.py
@@ -13,6 +13,7 @@ from .. import tools as Tools
 from ollama import ChatResponse
 import json
 import time
+import inspect
 
 class Chat(Agent, ABC):
   """
@@ -26,7 +27,8 @@ class Chat(Agent, ABC):
     """
     Prepare message with context information in message.preamble
     """
-    self.agent_function_display()
+    logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
     if not self.context:
        raise ValueError("Context is not set for this agent.")
     
@@ -73,7 +75,8 @@ class Chat(Agent, ABC):
     return
 
   async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]:
-    self.agent_function_display()
+    logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
     if not self.context:
       raise ValueError("Context is not set for this agent.")
     if not message.metadata["tools"]:
@@ -191,7 +194,8 @@ class Chat(Agent, ABC):
     return
         
   async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
-    self.agent_function_display()
+    logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
     if not self.context:
         raise ValueError("Context is not set for this agent.")
 
@@ -299,6 +303,7 @@ class Chat(Agent, ABC):
       return
 
     # not use_tools
+    yield message
     # Reset the response for streaming
     message.response = ""
     start_time = time.perf_counter()
@@ -333,7 +338,8 @@ class Chat(Agent, ABC):
     return
   
   async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]:
-      self.agent_function_display()
+      logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
+
       if not self.context:
           raise ValueError("Context is not set for this agent.")
           
@@ -353,6 +359,9 @@ class Chat(Agent, ABC):
       message.context_prompt += f"{message.prompt}"
 
       # Estimate token length of new messages
+      message.response = f"Optimizing context..."
+      message.status = "thinking"
+      yield message
       message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
       
       message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
@@ -360,16 +369,13 @@ class Chat(Agent, ABC):
       yield message
 
       async for message in self.generate_llm_response(llm, model, message):
-          # logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
-          if message.status == "error":
-              yield message
-              self.context.processing = False
-              return
-          if message.status != "done":
-              yield message
+        # logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
+        if message.status == "error":
+          yield message
+          self.context.processing = False
+          return
+        yield message
       
-      yield message
-
       # Done processing, add message to conversation
       message.status = "done"
       self.conversation.add_message(message)