Working!
This commit is contained in:
parent
2cfd6e8f28
commit
b8fbe145c9
@ -100,14 +100,14 @@ class Chat(Agent, ABC):
|
|||||||
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
|
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
|
||||||
stream=True,
|
stream=True,
|
||||||
):
|
):
|
||||||
logging.info(f"LLM: {'done' if response.done else 'thinking'} - {response.message.content}")
|
|
||||||
message.response += response.message.content
|
message.response += response.message.content
|
||||||
message.metadata["eval_count"] += response["eval_count"]
|
|
||||||
message.metadata["eval_duration"] += response["eval_duration"]
|
|
||||||
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
|
|
||||||
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
|
|
||||||
yield message
|
yield message
|
||||||
if response.done:
|
if response.done:
|
||||||
|
message.metadata["eval_count"] += response.eval_count
|
||||||
|
message.metadata["eval_duration"] += response.eval_duration
|
||||||
|
message.metadata["prompt_eval_count"] += response.prompt_eval_count
|
||||||
|
message.metadata["prompt_eval_duration"] += response.prompt_eval_duration
|
||||||
|
self.context_tokens = response.prompt_eval_count + response.eval_count
|
||||||
message.status = "done"
|
message.status = "done"
|
||||||
|
|
||||||
if not response:
|
if not response:
|
||||||
@ -117,8 +117,6 @@ class Chat(Agent, ABC):
|
|||||||
self.context.processing = False
|
self.context.processing = False
|
||||||
return
|
return
|
||||||
|
|
||||||
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
|
|
||||||
|
|
||||||
yield message
|
yield message
|
||||||
self.context.processing = False
|
self.context.processing = False
|
||||||
return
|
return
|
||||||
@ -216,7 +214,7 @@ class Chat(Agent, ABC):
|
|||||||
yield message
|
yield message
|
||||||
|
|
||||||
async for message in self.generate_llm_response(llm, model, message):
|
async for message in self.generate_llm_response(llm, model, message):
|
||||||
logging.info(f"LLM: {message.status} - {message.response}")
|
logging.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
|
||||||
if message.status == "error":
|
if message.status == "error":
|
||||||
return
|
return
|
||||||
if message.status != "done":
|
if message.status != "done":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user