This commit is contained in:
James Ketr 2025-04-30 23:24:09 -07:00
parent 2cfd6e8f28
commit b8fbe145c9

View File

@ -100,14 +100,14 @@ class Chat(Agent, ABC):
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
stream=True,
):
logging.info(f"LLM: {'done' if response.done else 'thinking'} - {response.message.content}")
message.response += response.message.content
message.metadata["eval_count"] += response["eval_count"]
message.metadata["eval_duration"] += response["eval_duration"]
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
yield message
if response.done:
message.metadata["eval_count"] += response.eval_count
message.metadata["eval_duration"] += response.eval_duration
message.metadata["prompt_eval_count"] += response.prompt_eval_count
message.metadata["prompt_eval_duration"] += response.prompt_eval_duration
self.context_tokens = response.prompt_eval_count + response.eval_count
message.status = "done"
if not response:
@ -116,8 +116,6 @@ class Chat(Agent, ABC):
yield message
self.context.processing = False
return
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
yield message
self.context.processing = False
@ -216,7 +214,7 @@ class Chat(Agent, ABC):
yield message
async for message in self.generate_llm_response(llm, model, message):
logging.info(f"LLM: {message.status} - {message.response}")
logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
if message.status == "error":
return
if message.status != "done":