Tools are working and shared context is in use aross all agents
This commit is contained in:
parent
202060f5b5
commit
cc0f6974ff
@ -46,17 +46,21 @@ class Agent(BaseModel, ABC):
|
|||||||
_content_seed: str = PrivateAttr(default="")
|
_content_seed: str = PrivateAttr(default="")
|
||||||
|
|
||||||
def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
|
def set_optimal_context_size(self, llm: Any, model: str, prompt: str, ctx_buffer=2048) -> int:
|
||||||
# Get more accurate token count estimate using tiktoken or similar
|
# # Get more accurate token count estimate using tiktoken or similar
|
||||||
response = llm.generate(
|
# response = llm.generate(
|
||||||
model=model,
|
# model=model,
|
||||||
prompt=prompt,
|
# prompt=prompt,
|
||||||
options={
|
# options={
|
||||||
"num_ctx": self.context_size,
|
# "num_ctx": self.context_size,
|
||||||
"num_predict": 0,
|
# "num_predict": 0,
|
||||||
} # Don't generate any tokens, just tokenize
|
# } # Don't generate any tokens, just tokenize
|
||||||
)
|
# )
|
||||||
# The prompt_eval_count gives you the token count of your input
|
# # The prompt_eval_count gives you the token count of your input
|
||||||
tokens = response.get("prompt_eval_count", 0)
|
# tokens = response.get("prompt_eval_count", 0)
|
||||||
|
|
||||||
|
# Most models average 1.3-1.5 tokens per word
|
||||||
|
word_count = len(prompt.split())
|
||||||
|
tokens = int(word_count * 1.4)
|
||||||
|
|
||||||
# Add buffer for safety
|
# Add buffer for safety
|
||||||
total_ctx = tokens + ctx_buffer
|
total_ctx = tokens + ctx_buffer
|
||||||
@ -91,10 +95,6 @@ class Agent(BaseModel, ABC):
|
|||||||
"""Return the set of valid agent_type values."""
|
"""Return the set of valid agent_type values."""
|
||||||
return set(get_args(cls.__annotations__["agent_type"]))
|
return set(get_args(cls.__annotations__["agent_type"]))
|
||||||
|
|
||||||
def agent_function_display(self):
|
|
||||||
import inspect
|
|
||||||
logger.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
|
||||||
|
|
||||||
def set_context(self, context):
|
def set_context(self, context):
|
||||||
object.__setattr__(self, "context", context)
|
object.__setattr__(self, "context", context)
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from .. import tools as Tools
|
|||||||
from ollama import ChatResponse
|
from ollama import ChatResponse
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import inspect
|
||||||
|
|
||||||
class Chat(Agent, ABC):
|
class Chat(Agent, ABC):
|
||||||
"""
|
"""
|
||||||
@ -26,7 +27,8 @@ class Chat(Agent, ABC):
|
|||||||
"""
|
"""
|
||||||
Prepare message with context information in message.preamble
|
Prepare message with context information in message.preamble
|
||||||
"""
|
"""
|
||||||
self.agent_function_display()
|
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||||
|
|
||||||
if not self.context:
|
if not self.context:
|
||||||
raise ValueError("Context is not set for this agent.")
|
raise ValueError("Context is not set for this agent.")
|
||||||
|
|
||||||
@ -73,7 +75,8 @@ class Chat(Agent, ABC):
|
|||||||
return
|
return
|
||||||
|
|
||||||
async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]:
|
async def process_tool_calls(self, llm: Any, model: str, message: Message, tool_message: Any, messages: List[Any]) -> AsyncGenerator[Message, None]:
|
||||||
self.agent_function_display()
|
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||||
|
|
||||||
if not self.context:
|
if not self.context:
|
||||||
raise ValueError("Context is not set for this agent.")
|
raise ValueError("Context is not set for this agent.")
|
||||||
if not message.metadata["tools"]:
|
if not message.metadata["tools"]:
|
||||||
@ -191,7 +194,8 @@ class Chat(Agent, ABC):
|
|||||||
return
|
return
|
||||||
|
|
||||||
async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
|
async def generate_llm_response(self, llm: Any, model: str, message: Message) -> AsyncGenerator[Message, None]:
|
||||||
self.agent_function_display()
|
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||||
|
|
||||||
if not self.context:
|
if not self.context:
|
||||||
raise ValueError("Context is not set for this agent.")
|
raise ValueError("Context is not set for this agent.")
|
||||||
|
|
||||||
@ -299,6 +303,7 @@ class Chat(Agent, ABC):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# not use_tools
|
# not use_tools
|
||||||
|
yield message
|
||||||
# Reset the response for streaming
|
# Reset the response for streaming
|
||||||
message.response = ""
|
message.response = ""
|
||||||
start_time = time.perf_counter()
|
start_time = time.perf_counter()
|
||||||
@ -333,7 +338,8 @@ class Chat(Agent, ABC):
|
|||||||
return
|
return
|
||||||
|
|
||||||
async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]:
|
async def process_message(self, llm: Any, model: str, message:Message) -> AsyncGenerator[Message, None]:
|
||||||
self.agent_function_display()
|
logging.info(f"{self.agent_type} - {inspect.stack()[1].function}")
|
||||||
|
|
||||||
if not self.context:
|
if not self.context:
|
||||||
raise ValueError("Context is not set for this agent.")
|
raise ValueError("Context is not set for this agent.")
|
||||||
|
|
||||||
@ -353,6 +359,9 @@ class Chat(Agent, ABC):
|
|||||||
message.context_prompt += f"{message.prompt}"
|
message.context_prompt += f"{message.prompt}"
|
||||||
|
|
||||||
# Estimate token length of new messages
|
# Estimate token length of new messages
|
||||||
|
message.response = f"Optimizing context..."
|
||||||
|
message.status = "thinking"
|
||||||
|
yield message
|
||||||
message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
|
message.metadata["context_size"] = self.set_optimal_context_size(llm, model, prompt=message.context_prompt)
|
||||||
|
|
||||||
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
|
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
|
||||||
@ -360,15 +369,12 @@ class Chat(Agent, ABC):
|
|||||||
yield message
|
yield message
|
||||||
|
|
||||||
async for message in self.generate_llm_response(llm, model, message):
|
async for message in self.generate_llm_response(llm, model, message):
|
||||||
# logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
|
# logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
|
||||||
if message.status == "error":
|
if message.status == "error":
|
||||||
yield message
|
yield message
|
||||||
self.context.processing = False
|
self.context.processing = False
|
||||||
return
|
return
|
||||||
if message.status != "done":
|
yield message
|
||||||
yield message
|
|
||||||
|
|
||||||
yield message
|
|
||||||
|
|
||||||
# Done processing, add message to conversation
|
# Done processing, add message to conversation
|
||||||
message.status = "done"
|
message.status = "done"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user