Restructure metrics collection so they occur at run-time
This commit is contained in:
parent
aa071f38aa
commit
695bf5f58c
@ -881,9 +881,6 @@ class WebServer:
|
|||||||
return
|
return
|
||||||
|
|
||||||
logger.info(f"{agent_type}.process_message: {message.status} {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
|
logger.info(f"{agent_type}.process_message: {message.status} {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
|
||||||
if message.metadata["eval_count"]:
|
|
||||||
agent.metrics.tokens_prompt.labels(agent=agent.agent_type).inc(message.metadata["prompt_eval_count"])
|
|
||||||
agent.metrics.tokens_eval.labels(agent=agent.agent_type).inc(message.metadata["eval_count"])
|
|
||||||
message.status = "done"
|
message.status = "done"
|
||||||
yield message
|
yield message
|
||||||
return
|
return
|
||||||
|
@ -263,11 +263,11 @@ class Agent(BaseModel, ABC):
|
|||||||
for response in llm.chat(
|
for response in llm.chat(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True,
|
|
||||||
options={
|
options={
|
||||||
**message.metadata["options"],
|
**message.metadata["options"],
|
||||||
# "temperature": 0.5,
|
# "temperature": 0.5,
|
||||||
}
|
},
|
||||||
|
stream=True,
|
||||||
):
|
):
|
||||||
# logger.info(f"LLM::Tools: {'done' if response.done else 'processing'} - {response.message}")
|
# logger.info(f"LLM::Tools: {'done' if response.done else 'processing'} - {response.message}")
|
||||||
message.status = "streaming"
|
message.status = "streaming"
|
||||||
@ -278,6 +278,7 @@ class Agent(BaseModel, ABC):
|
|||||||
yield message
|
yield message
|
||||||
|
|
||||||
if response.done:
|
if response.done:
|
||||||
|
self.collect_metrics(response)
|
||||||
message.metadata["eval_count"] += response.eval_count
|
message.metadata["eval_count"] += response.eval_count
|
||||||
message.metadata["eval_duration"] += response.eval_duration
|
message.metadata["eval_duration"] += response.eval_duration
|
||||||
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||||
@ -290,6 +291,10 @@ class Agent(BaseModel, ABC):
|
|||||||
message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}"
|
message.metadata["timers"]["llm_with_tools"] = f"{(end_time - start_time):.4f}"
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def collect_metrics(self, response):
|
||||||
|
self.metrics.tokens_prompt.labels(agent=self.agent_type).inc(response.prompt_eval_count)
|
||||||
|
self.metrics.tokens_eval.labels(agent=self.agent_type).inc(response.eval_count)
|
||||||
|
|
||||||
async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature = 0.7) -> AsyncGenerator[Message, None]:
|
async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature = 0.7) -> AsyncGenerator[Message, None]:
|
||||||
logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
|
logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
|
||||||
|
|
||||||
@ -354,6 +359,7 @@ class Agent(BaseModel, ABC):
|
|||||||
},
|
},
|
||||||
stream=False # No need to stream the probe
|
stream=False # No need to stream the probe
|
||||||
)
|
)
|
||||||
|
self.collect_metrics(response)
|
||||||
|
|
||||||
end_time = time.perf_counter()
|
end_time = time.perf_counter()
|
||||||
message.metadata["timers"]["tool_check"] = f"{(end_time - start_time):.4f}"
|
message.metadata["timers"]["tool_check"] = f"{(end_time - start_time):.4f}"
|
||||||
@ -382,6 +388,7 @@ class Agent(BaseModel, ABC):
|
|||||||
},
|
},
|
||||||
stream=False
|
stream=False
|
||||||
)
|
)
|
||||||
|
self.collect_metrics(response)
|
||||||
|
|
||||||
end_time = time.perf_counter()
|
end_time = time.perf_counter()
|
||||||
message.metadata["timers"]["non_streaming"] = f"{(end_time - start_time):.4f}"
|
message.metadata["timers"]["non_streaming"] = f"{(end_time - start_time):.4f}"
|
||||||
@ -441,6 +448,7 @@ class Agent(BaseModel, ABC):
|
|||||||
yield message
|
yield message
|
||||||
|
|
||||||
if response.done:
|
if response.done:
|
||||||
|
self.collect_metrics(response)
|
||||||
message.metadata["eval_count"] += response.eval_count
|
message.metadata["eval_count"] += response.eval_count
|
||||||
message.metadata["eval_duration"] += response.eval_duration
|
message.metadata["eval_duration"] += response.eval_duration
|
||||||
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||||
|
@ -544,6 +544,7 @@ class JobDescription(Agent):
|
|||||||
message.chunk = ""
|
message.chunk = ""
|
||||||
|
|
||||||
if response.done:
|
if response.done:
|
||||||
|
self.collect_metrics(response)
|
||||||
message.metadata["eval_count"] += response.eval_count
|
message.metadata["eval_count"] += response.eval_count
|
||||||
message.metadata["eval_duration"] += response.eval_duration
|
message.metadata["eval_duration"] += response.eval_duration
|
||||||
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||||
|
Loading…
x
Reference in New Issue
Block a user