Working!
This commit is contained in:
parent
8a4f94817a
commit
2cfd6e8f28
@ -386,25 +386,11 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
|
||||
// Process each complete line immediately
|
||||
buffer += chunk;
|
||||
let lines = buffer.split('\n');
|
||||
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
|
||||
try {
|
||||
const process_line = async (line: string) => {
|
||||
const update = JSON.parse(line);
|
||||
|
||||
switch (update.status) {
|
||||
case 'searching':
|
||||
case 'processing':
|
||||
case 'thinking':
|
||||
// Force an immediate state update based on the message type
|
||||
@ -458,9 +444,26 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
|
||||
// Process each complete line immediately
|
||||
buffer += chunk;
|
||||
let lines = buffer.split('\n');
|
||||
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
await process_line(line);
|
||||
} catch (e) {
|
||||
setSnack("Error processing query", "error")
|
||||
console.error('Error parsing JSON:', e, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -468,47 +471,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
||||
// Process any remaining buffer content
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const update = JSON.parse(buffer);
|
||||
|
||||
switch (update.status) {
|
||||
case 'processing':
|
||||
case 'thinking':
|
||||
// Force an immediate state update based on the message type
|
||||
// Update processing message with immediate re-render
|
||||
setProcessingMessage({ role: 'status', content: update.response });
|
||||
// Add a small delay to ensure React has time to update the UI
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
break;
|
||||
case 'error':
|
||||
// Show error
|
||||
setProcessingMessage({ role: 'error', content: update.response });
|
||||
setTimeout(() => {
|
||||
setProcessingMessage(undefined);
|
||||
}, 5000);
|
||||
break;
|
||||
case 'done':
|
||||
console.log('Done processing:', update);
|
||||
if (onResponse) {
|
||||
update.message = onResponse(update);
|
||||
}
|
||||
setProcessingMessage(undefined);
|
||||
const backstoryMessage: BackstoryMessage = update;
|
||||
setConversation([
|
||||
...conversationRef.current, {
|
||||
// role: 'user',
|
||||
// content: backstoryMessage.prompt || "",
|
||||
// }, {
|
||||
role: 'assistant',
|
||||
origin: type,
|
||||
prompt: backstoryMessage.prompt || "",
|
||||
content: backstoryMessage.response || "",
|
||||
preamble: backstoryMessage.preamble || {},
|
||||
full_content: backstoryMessage.full_content || "",
|
||||
metadata: backstoryMessage.metadata,
|
||||
actions: backstoryMessage.actions,
|
||||
}] as MessageList);
|
||||
break;
|
||||
}
|
||||
await process_line(buffer);
|
||||
} catch (e) {
|
||||
setSnack("Error processing query", "error")
|
||||
}
|
||||
|
@ -92,18 +92,22 @@ class Chat(Agent, ABC):
|
||||
]
|
||||
]
|
||||
|
||||
for value in llm.chat(
|
||||
message.status = "thinking"
|
||||
for response in llm.chat(
|
||||
model=model,
|
||||
messages=messages,
|
||||
#tools=llm_tools(context.tools) if message.enable_tools else None,
|
||||
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
|
||||
stream=True,
|
||||
):
|
||||
logging.info(f"LLM: {'done' if value.done else 'thinking'} - {value.message.content}")
|
||||
message.response += value.message.content
|
||||
logging.info(f"LLM: {'done' if response.done else 'thinking'} - {response.message.content}")
|
||||
message.response += response.message.content
|
||||
message.metadata["eval_count"] += response["eval_count"]
|
||||
message.metadata["eval_duration"] += response["eval_duration"]
|
||||
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
|
||||
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
|
||||
yield message
|
||||
if value.done:
|
||||
response = value
|
||||
if response.done:
|
||||
message.status = "done"
|
||||
|
||||
if not response:
|
||||
@ -113,10 +117,6 @@ class Chat(Agent, ABC):
|
||||
self.context.processing = False
|
||||
return
|
||||
|
||||
message.metadata["eval_count"] += response["eval_count"]
|
||||
message.metadata["eval_duration"] += response["eval_duration"]
|
||||
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
|
||||
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
|
||||
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
|
||||
|
||||
yield message
|
||||
@ -212,7 +212,7 @@ class Chat(Agent, ABC):
|
||||
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content)
|
||||
|
||||
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
|
||||
message.status = "thinking"
|
||||
message.status = "searching"
|
||||
yield message
|
||||
|
||||
async for message in self.generate_llm_response(llm, model, message):
|
||||
|
Loading…
x
Reference in New Issue
Block a user