diff --git a/frontend/src/Conversation.tsx b/frontend/src/Conversation.tsx index 2b21947..b29093f 100644 --- a/frontend/src/Conversation.tsx +++ b/frontend/src/Conversation.tsx @@ -386,6 +386,66 @@ const Conversation = forwardRef(({ const decoder = new TextDecoder(); let buffer = ''; + const process_line = async (line: string) => { + const update = JSON.parse(line); + + switch (update.status) { + case 'searching': + case 'processing': + case 'thinking': + // Force an immediate state update based on the message type + // Update processing message with immediate re-render + setProcessingMessage({ role: 'status', content: update.response }); + // Add a small delay to ensure React has time to update the UI + await new Promise(resolve => setTimeout(resolve, 0)); + break; + case 'done': + console.log('Done processing:', update); + // Replace processing message with final result + if (onResponse) { + update.message = onResponse(update); + } + setProcessingMessage(undefined); + const backstoryMessage: BackstoryMessage = update; + setConversation([ + ...conversationRef.current, { + // role: 'user', + // content: backstoryMessage.prompt || "", + // }, { + role: 'assistant', + origin: type, + content: backstoryMessage.response || "", + prompt: backstoryMessage.prompt || "", + preamble: backstoryMessage.preamble || {}, + full_content: backstoryMessage.full_content || "", + metadata: backstoryMessage.metadata, + actions: backstoryMessage.actions, + }] as MessageList); + // Add a small delay to ensure React has time to update the UI + await new Promise(resolve => setTimeout(resolve, 0)); + + const metadata = update.metadata; + if (metadata) { + const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration; + const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration; + setLastEvalTPS(evalTPS ? evalTPS : 35); + setLastPromptTPS(promptTPS ? promptTPS : 35); + updateContextStatus(); + } + break; + case 'error': + // Show error + setProcessingMessage({ role: 'error', content: update.response }); + setTimeout(() => { + setProcessingMessage(undefined); + }, 5000); + + // Add a small delay to ensure React has time to update the UI + await new Promise(resolve => setTimeout(resolve, 0)); + break; + } + } + while (true) { const { done, value } = await reader.read(); if (done) { @@ -400,67 +460,10 @@ const Conversation = forwardRef(({ buffer = lines.pop() || ''; // Keep incomplete line in buffer for (const line of lines) { if (!line.trim()) continue; - try { - const update = JSON.parse(line); - - switch (update.status) { - case 'processing': - case 'thinking': - // Force an immediate state update based on the message type - // Update processing message with immediate re-render - setProcessingMessage({ role: 'status', content: update.response }); - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - break; - case 'done': - console.log('Done processing:', update); - // Replace processing message with final result - if (onResponse) { - update.message = onResponse(update); - } - setProcessingMessage(undefined); - const backstoryMessage: BackstoryMessage = update; - setConversation([ - ...conversationRef.current, { - // role: 'user', - // content: backstoryMessage.prompt || "", - // }, { - role: 'assistant', - origin: type, - content: backstoryMessage.response || "", - prompt: backstoryMessage.prompt || "", - preamble: backstoryMessage.preamble || {}, - full_content: backstoryMessage.full_content || "", - metadata: backstoryMessage.metadata, - actions: backstoryMessage.actions, - }] as MessageList); - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - - const metadata = update.metadata; - if (metadata) { - const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration; - const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration; - setLastEvalTPS(evalTPS ? evalTPS : 35); - setLastPromptTPS(promptTPS ? promptTPS : 35); - updateContextStatus(); - } - break; - case 'error': - // Show error - setProcessingMessage({ role: 'error', content: update.response }); - setTimeout(() => { - setProcessingMessage(undefined); - }, 5000); - - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - break; - } + await process_line(line); } catch (e) { setSnack("Error processing query", "error") - console.error('Error parsing JSON:', e, line); } } } @@ -468,47 +471,7 @@ const Conversation = forwardRef(({ // Process any remaining buffer content if (buffer.trim()) { try { - const update = JSON.parse(buffer); - - switch (update.status) { - case 'processing': - case 'thinking': - // Force an immediate state update based on the message type - // Update processing message with immediate re-render - setProcessingMessage({ role: 'status', content: update.response }); - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - break; - case 'error': - // Show error - setProcessingMessage({ role: 'error', content: update.response }); - setTimeout(() => { - setProcessingMessage(undefined); - }, 5000); - break; - case 'done': - console.log('Done processing:', update); - if (onResponse) { - update.message = onResponse(update); - } - setProcessingMessage(undefined); - const backstoryMessage: BackstoryMessage = update; - setConversation([ - ...conversationRef.current, { - // role: 'user', - // content: backstoryMessage.prompt || "", - // }, { - role: 'assistant', - origin: type, - prompt: backstoryMessage.prompt || "", - content: backstoryMessage.response || "", - preamble: backstoryMessage.preamble || {}, - full_content: backstoryMessage.full_content || "", - metadata: backstoryMessage.metadata, - actions: backstoryMessage.actions, - }] as MessageList); - break; - } + await process_line(buffer); } catch (e) { setSnack("Error processing query", "error") } diff --git a/src/utils/agents/chat.py b/src/utils/agents/chat.py index 43a563c..0c6a2a0 100644 --- a/src/utils/agents/chat.py +++ b/src/utils/agents/chat.py @@ -92,18 +92,22 @@ class Chat(Agent, ABC): ] ] - for value in llm.chat( + message.status = "thinking" + for response in llm.chat( model=model, messages=messages, #tools=llm_tools(context.tools) if message.enable_tools else None, options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context }, stream=True, ): - logging.info(f"LLM: {'done' if value.done else 'thinking'} - {value.message.content}") - message.response += value.message.content + logging.info(f"LLM: {'done' if response.done else 'thinking'} - {response.message.content}") + message.response += response.message.content + message.metadata["eval_count"] += response["eval_count"] + message.metadata["eval_duration"] += response["eval_duration"] + message.metadata["prompt_eval_count"] += response["prompt_eval_count"] + message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"] yield message - if value.done: - response = value + if response.done: message.status = "done" if not response: @@ -113,10 +117,6 @@ class Chat(Agent, ABC): self.context.processing = False return - message.metadata["eval_count"] += response["eval_count"] - message.metadata["eval_duration"] += response["eval_duration"] - message.metadata["prompt_eval_count"] += response["prompt_eval_count"] - message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"] self.context_tokens = response["prompt_eval_count"] + response["eval_count"] yield message @@ -212,7 +212,7 @@ class Chat(Agent, ABC): message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content) message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..." - message.status = "thinking" + message.status = "searching" yield message async for message in self.generate_llm_response(llm, model, message):