From 695bf5f58c6e44ff14bf261607bd9f2f24661012 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Fri, 9 May 2025 16:06:45 -0700 Subject: [PATCH] Restructure metrics collection so they occur at run-time --- src/server.py | 3 --- src/utils/agents/base.py | 12 ++++++++++-- src/utils/agents/job_description.py | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/server.py b/src/server.py index f1fb273..ab73693 100644 --- a/src/server.py +++ b/src/server.py @@ -881,9 +881,6 @@ class WebServer: return logger.info(f"{agent_type}.process_message: {message.status} {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}") - if message.metadata["eval_count"]: - agent.metrics.tokens_prompt.labels(agent=agent.agent_type).inc(message.metadata["prompt_eval_count"]) - agent.metrics.tokens_eval.labels(agent=agent.agent_type).inc(message.metadata["eval_count"]) message.status = "done" yield message return diff --git a/src/utils/agents/base.py b/src/utils/agents/base.py index 6e1001a..352da48 100644 --- a/src/utils/agents/base.py +++ b/src/utils/agents/base.py @@ -263,11 +263,11 @@ class Agent(BaseModel, ABC): for response in llm.chat( model=model, messages=messages, - stream=True, options={ **message.metadata["options"], # "temperature": 0.5, - } + }, + stream=True, ): # logger.info(f"LLM::Tools: {'done' if response.done else 'processing'} - {response.message}") message.status = "streaming" @@ -278,6 +278,7 @@ class Agent(BaseModel, ABC): yield message if response.done: + self.collect_metrics(response) message.metadata["eval_count"] += response.eval_count message.metadata["eval_duration"] += response.eval_duration message.metadata["prompt_eval_count"] += response.prompt_eval_count @@ -290,6 +291,10 @@ class Agent(BaseModel, ABC): message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}" return + def collect_metrics(self, response): + self.metrics.tokens_prompt.labels(agent=self.agent_type).inc(response.prompt_eval_count) + self.metrics.tokens_eval.labels(agent=self.agent_type).inc(response.eval_count) + async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature = 0.7) -> AsyncGenerator[Message, None]: logger.info(f"{self.agent_type} - {inspect.stack()[0].function}") @@ -354,6 +359,7 @@ class Agent(BaseModel, ABC): }, stream=False # No need to stream the probe ) + self.collect_metrics(response) end_time = time.perf_counter() message.metadata["timers"]["tool_check"] = f"{(end_time - start_time):.4f}" @@ -382,6 +388,7 @@ class Agent(BaseModel, ABC): }, stream=False ) + self.collect_metrics(response) end_time = time.perf_counter() message.metadata["timers"]["non_streaming"] = f"{(end_time - start_time):.4f}" @@ -441,6 +448,7 @@ class Agent(BaseModel, ABC): yield message if response.done: + self.collect_metrics(response) message.metadata["eval_count"] += response.eval_count message.metadata["eval_duration"] += response.eval_duration message.metadata["prompt_eval_count"] += response.prompt_eval_count diff --git a/src/utils/agents/job_description.py b/src/utils/agents/job_description.py index 0dea133..966b684 100644 --- a/src/utils/agents/job_description.py +++ b/src/utils/agents/job_description.py @@ -544,6 +544,7 @@ class JobDescription(Agent): message.chunk = "" if response.done: + self.collect_metrics(response) message.metadata["eval_count"] += response.eval_count message.metadata["eval_duration"] += response.eval_duration message.metadata["prompt_eval_count"] += response.prompt_eval_count