This commit is contained in:
James Ketr 2025-04-30 23:07:06 -07:00
parent 8a4f94817a
commit 2cfd6e8f28
2 changed files with 72 additions and 109 deletions

View File

@ -386,6 +386,66 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
const decoder = new TextDecoder();
let buffer = '';
const process_line = async (line: string) => {
const update = JSON.parse(line);
switch (update.status) {
case 'searching':
case 'processing':
case 'thinking':
// Force an immediate state update based on the message type
// Update processing message with immediate re-render
setProcessingMessage({ role: 'status', content: update.response });
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
case 'done':
console.log('Done processing:', update);
// Replace processing message with final result
if (onResponse) {
update.message = onResponse(update);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update;
setConversation([
...conversationRef.current, {
// role: 'user',
// content: backstoryMessage.prompt || "",
// }, {
role: 'assistant',
origin: type,
content: backstoryMessage.response || "",
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
const metadata = update.metadata;
if (metadata) {
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
}
break;
case 'error':
// Show error
setProcessingMessage({ role: 'error', content: update.response });
setTimeout(() => {
setProcessingMessage(undefined);
}, 5000);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
}
}
while (true) {
const { done, value } = await reader.read();
if (done) {
@ -400,67 +460,10 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
buffer = lines.pop() || ''; // Keep incomplete line in buffer
for (const line of lines) {
if (!line.trim()) continue;
try {
const update = JSON.parse(line);
switch (update.status) {
case 'processing':
case 'thinking':
// Force an immediate state update based on the message type
// Update processing message with immediate re-render
setProcessingMessage({ role: 'status', content: update.response });
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
case 'done':
console.log('Done processing:', update);
// Replace processing message with final result
if (onResponse) {
update.message = onResponse(update);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update;
setConversation([
...conversationRef.current, {
// role: 'user',
// content: backstoryMessage.prompt || "",
// }, {
role: 'assistant',
origin: type,
content: backstoryMessage.response || "",
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
const metadata = update.metadata;
if (metadata) {
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
}
break;
case 'error':
// Show error
setProcessingMessage({ role: 'error', content: update.response });
setTimeout(() => {
setProcessingMessage(undefined);
}, 5000);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
}
await process_line(line);
} catch (e) {
setSnack("Error processing query", "error")
console.error('Error parsing JSON:', e, line);
}
}
}
@ -468,47 +471,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
// Process any remaining buffer content
if (buffer.trim()) {
try {
const update = JSON.parse(buffer);
switch (update.status) {
case 'processing':
case 'thinking':
// Force an immediate state update based on the message type
// Update processing message with immediate re-render
setProcessingMessage({ role: 'status', content: update.response });
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
case 'error':
// Show error
setProcessingMessage({ role: 'error', content: update.response });
setTimeout(() => {
setProcessingMessage(undefined);
}, 5000);
break;
case 'done':
console.log('Done processing:', update);
if (onResponse) {
update.message = onResponse(update);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update;
setConversation([
...conversationRef.current, {
// role: 'user',
// content: backstoryMessage.prompt || "",
// }, {
role: 'assistant',
origin: type,
prompt: backstoryMessage.prompt || "",
content: backstoryMessage.response || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
break;
}
await process_line(buffer);
} catch (e) {
setSnack("Error processing query", "error")
}

View File

@ -92,18 +92,22 @@ class Chat(Agent, ABC):
]
]
for value in llm.chat(
message.status = "thinking"
for response in llm.chat(
model=model,
messages=messages,
#tools=llm_tools(context.tools) if message.enable_tools else None,
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
stream=True,
):
logging.info(f"LLM: {'done' if value.done else 'thinking'} - {value.message.content}")
message.response += value.message.content
logging.info(f"LLM: {'done' if response.done else 'thinking'} - {response.message.content}")
message.response += response.message.content
message.metadata["eval_count"] += response["eval_count"]
message.metadata["eval_duration"] += response["eval_duration"]
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
yield message
if value.done:
response = value
if response.done:
message.status = "done"
if not response:
@ -113,10 +117,6 @@ class Chat(Agent, ABC):
self.context.processing = False
return
message.metadata["eval_count"] += response["eval_count"]
message.metadata["eval_duration"] += response["eval_duration"]
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
yield message
@ -212,7 +212,7 @@ class Chat(Agent, ABC):
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content)
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
message.status = "thinking"
message.status = "searching"
yield message
async for message in self.generate_llm_response(llm, model, message):