Tools are working and shared context is in use aross all agents
This commit is contained in:
parent
202060f5b5
commit
cc0f6974ff
@ -46,17 +46,21 @@ class Agent(BaseModel, ABC):
|
||||
_content_seed: str = PrivateAttr(default="")
|
||||
|
||||
def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
|
||||
# Get more accurate token count estimate using tiktoken or similar
|
||||
response = llm.generate(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
options={
|
||||
"num_ctx": self.context_size,
|
||||
"num_predict": 0,
|
||||
} # Don't generate any tokens, just tokenize
|
||||
)
|
||||
# The prompt_eval_count gives you the token count of your input
|
||||
tokens = response.get("prompt_eval_count", 0)
|
||||
# # Get more accurate token count estimate using tiktoken or similar
|
||||
# response = llm.generate(
|
||||
# model=model,
|
||||
# prompt=prompt,
|
||||
# options={
|
||||
# "num_ctx": self.context_size,
|
||||
# "num_predict": 0,
|
||||
# } # Don't generate any tokens, just tokenize
|
||||
# )
|
||||
# # The prompt_eval_count gives you the token count of your input
|
||||
# tokens = response.get("prompt_eval_count", 0)
|
||||
|
||||
# Most models average 1.3-1.5 tokens per word
|
||||
word_count = len(prompt.split())
|
||||
tokens = int(word_count * 1.4)
|
||||
|
||||
# Add buffer for safety
|
||||
total_ctx = tokens + ctx_buffer
|
||||
@ -91,10 +95,6 @@ class Agent(BaseModel, ABC):
|
||||
"""Return the set of valid agent_type values."""
|
||||
return set(get_args(cls.__annotations__["agent_type"]))
|
||||
|
||||
def agent_function_display(self):
|
||||
import inspect
|
||||
logger.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||
|
||||
def set_context(self, context):
|
||||
object.__setattr__(self, "context", context)
|
||||
|
||||
|
@ -13,6 +13,7 @@ from .. import tools as Tools
|
||||
from ollama import ChatResponse
|
||||
import json
|
||||
import time
|
||||
import inspect
|
||||
|
||||
class Chat(Agent, ABC):
|
||||
"""
|
||||
@ -26,7 +27,8 @@ class Chat(Agent, ABC):
|
||||
"""
|
||||
Prepare message with context information in message.preamble
|
||||
"""
|
||||
self.agent_function_display()
|
||||
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||
|
||||
if not self.context:
|
||||
raise ValueError("Context is not set for this agent.")
|
||||
|
||||
@ -73,7 +75,8 @@ class Chat(Agent, ABC):
|
||||
return
|
||||
|
||||
async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]:
|
||||
self.agent_function_display()
|
||||
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||
|
||||
if not self.context:
|
||||
raise ValueError("Context is not set for this agent.")
|
||||
if not message.metadata["tools"]:
|
||||
@ -191,7 +194,8 @@ class Chat(Agent, ABC):
|
||||
return
|
||||
|
||||
async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
|
||||
self.agent_function_display()
|
||||
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||
|
||||
if not self.context:
|
||||
raise ValueError("Context is not set for this agent.")
|
||||
|
||||
@ -299,6 +303,7 @@ class Chat(Agent, ABC):
|
||||
return
|
||||
|
||||
# not use_tools
|
||||
yield message
|
||||
# Reset the response for streaming
|
||||
message.response = ""
|
||||
start_time = time.perf_counter()
|
||||
@ -333,7 +338,8 @@ class Chat(Agent, ABC):
|
||||
return
|
||||
|
||||
async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]:
|
||||
self.agent_function_display()
|
||||
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||
|
||||
if not self.context:
|
||||
raise ValueError("Context is not set for this agent.")
|
||||
|
||||
@ -353,6 +359,9 @@ class Chat(Agent, ABC):
|
||||
message.context_prompt += f"{message.prompt}"
|
||||
|
||||
# Estimate token length of new messages
|
||||
message.response = f"Optimizing context..."
|
||||
message.status = "thinking"
|
||||
yield message
|
||||
message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
|
||||
|
||||
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
|
||||
@ -365,9 +374,6 @@ class Chat(Agent, ABC):
|
||||
yield message
|
||||
self.context.processing = False
|
||||
return
|
||||
if message.status != "done":
|
||||
yield message
|
||||
|
||||
yield message
|
||||
|
||||
# Done processing, add message to conversation
|
||||
|
Loading…
x
Reference in New Issue
Block a user