Restructure metrics collection so they occur at run-time

This commit is contained in:
James Ketr 2025-05-09 16:06:45 -07:00
parent aa071f38aa
commit 695bf5f58c
3 changed files with 11 additions and 5 deletions

View File

@ -881,9 +881,6 @@ class WebServer:
return
logger.info(f"{agent_type}.process_message: {message.status} {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
if message.metadata["eval_count"]:
agent.metrics.tokens_prompt.labels(agent=agent.agent_type).inc(message.metadata["prompt_eval_count"])
agent.metrics.tokens_eval.labels(agent=agent.agent_type).inc(message.metadata["eval_count"])
message.status = "done"
yield message
return

View File

@ -263,11 +263,11 @@ class Agent(BaseModel, ABC):
for response in llm.chat(
model=model,
messages=messages,
stream=True,
options={
**message.metadata["options"],
# "temperature": 0.5,
}
},
stream=True,
):
# logger.info(f"LLM::Tools: {'done' if response.done else 'processing'} - {response.message}")
message.status = "streaming"
@ -278,6 +278,7 @@ class Agent(BaseModel, ABC):
yield message
if response.done:
self.collect_metrics(response)
message.metadata["eval_count"] += response.eval_count
message.metadata["eval_duration"] += response.eval_duration
message.metadata["prompt_eval_count"] += response.prompt_eval_count
@ -290,6 +291,10 @@ class Agent(BaseModel, ABC):
message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}"
return
def collect_metrics(self, response):
self.metrics.tokens_prompt.labels(agent=self.agent_type).inc(response.prompt_eval_count)
self.metrics.tokens_eval.labels(agent=self.agent_type).inc(response.eval_count)
async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature = 0.7) -> AsyncGenerator[Message, None]:
logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
@ -354,6 +359,7 @@ class Agent(BaseModel, ABC):
},
stream=False # No need to stream the probe
)
self.collect_metrics(response)
end_time = time.perf_counter()
message.metadata["timers"]["tool_check"] = f"{(end_time - start_time):.4f}"
@ -382,6 +388,7 @@ class Agent(BaseModel, ABC):
},
stream=False
)
self.collect_metrics(response)
end_time = time.perf_counter()
message.metadata["timers"]["non_streaming"] = f"{(end_time - start_time):.4f}"
@ -441,6 +448,7 @@ class Agent(BaseModel, ABC):
yield message
if response.done:
self.collect_metrics(response)
message.metadata["eval_count"] += response.eval_count
message.metadata["eval_duration"] += response.eval_duration
message.metadata["prompt_eval_count"] += response.prompt_eval_count

View File

@ -544,6 +544,7 @@ class JobDescription(Agent):
message.chunk = ""
if response.done:
self.collect_metrics(response)
message.metadata["eval_count"] += response.eval_count
message.metadata["eval_duration"] += response.eval_duration
message.metadata["prompt_eval_count"] += response.prompt_eval_count