Restructure metrics collection so they occur at run-time
This commit is contained in:
parent
aa071f38aa
commit
695bf5f58c
@ -881,9 +881,6 @@ class WebServer:
|
||||
return
|
||||
|
||||
logger.info(f"{agent_type}.process_message: {message.status} {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
|
||||
if message.metadata["eval_count"]:
|
||||
agent.metrics.tokens_prompt.labels(agent=agent.agent_type).inc(message.metadata["prompt_eval_count"])
|
||||
agent.metrics.tokens_eval.labels(agent=agent.agent_type).inc(message.metadata["eval_count"])
|
||||
message.status = "done"
|
||||
yield message
|
||||
return
|
||||
|
@ -263,11 +263,11 @@ class Agent(BaseModel, ABC):
|
||||
for response in llm.chat(
|
||||
model=model,
|
||||
messages=messages,
|
||||
stream=True,
|
||||
options={
|
||||
**message.metadata["options"],
|
||||
# "temperature": 0.5,
|
||||
}
|
||||
},
|
||||
stream=True,
|
||||
):
|
||||
# logger.info(f"LLM::Tools: {'done' if response.done else 'processing'} - {response.message}")
|
||||
message.status = "streaming"
|
||||
@ -278,6 +278,7 @@ class Agent(BaseModel, ABC):
|
||||
yield message
|
||||
|
||||
if response.done:
|
||||
self.collect_metrics(response)
|
||||
message.metadata["eval_count"] += response.eval_count
|
||||
message.metadata["eval_duration"] += response.eval_duration
|
||||
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||
@ -290,6 +291,10 @@ class Agent(BaseModel, ABC):
|
||||
message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}"
|
||||
return
|
||||
|
||||
def collect_metrics(self, response):
|
||||
self.metrics.tokens_prompt.labels(agent=self.agent_type).inc(response.prompt_eval_count)
|
||||
self.metrics.tokens_eval.labels(agent=self.agent_type).inc(response.eval_count)
|
||||
|
||||
async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature = 0.7) -> AsyncGenerator[Message, None]:
|
||||
logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
|
||||
|
||||
@ -354,6 +359,7 @@ class Agent(BaseModel, ABC):
|
||||
},
|
||||
stream=False # No need to stream the probe
|
||||
)
|
||||
self.collect_metrics(response)
|
||||
|
||||
end_time = time.perf_counter()
|
||||
message.metadata["timers"]["tool_check"] = f"{(end_time - start_time):.4f}"
|
||||
@ -382,6 +388,7 @@ class Agent(BaseModel, ABC):
|
||||
},
|
||||
stream=False
|
||||
)
|
||||
self.collect_metrics(response)
|
||||
|
||||
end_time = time.perf_counter()
|
||||
message.metadata["timers"]["non_streaming"] = f"{(end_time - start_time):.4f}"
|
||||
@ -441,6 +448,7 @@ class Agent(BaseModel, ABC):
|
||||
yield message
|
||||
|
||||
if response.done:
|
||||
self.collect_metrics(response)
|
||||
message.metadata["eval_count"] += response.eval_count
|
||||
message.metadata["eval_duration"] += response.eval_duration
|
||||
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||
|
@ -544,6 +544,7 @@ class JobDescription(Agent):
|
||||
message.chunk = ""
|
||||
|
||||
if response.done:
|
||||
self.collect_metrics(response)
|
||||
message.metadata["eval_count"] += response.eval_count
|
||||
message.metadata["eval_duration"] += response.eval_duration
|
||||
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||
|
Loading…
x
Reference in New Issue
Block a user