Working!
This commit is contained in:
parent
8a4f94817a
commit
2cfd6e8f28
@ -386,25 +386,11 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
|||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
let buffer = '';
|
let buffer = '';
|
||||||
|
|
||||||
while (true) {
|
const process_line = async (line: string) => {
|
||||||
const { done, value } = await reader.read();
|
|
||||||
if (done) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const chunk = decoder.decode(value, { stream: true });
|
|
||||||
|
|
||||||
// Process each complete line immediately
|
|
||||||
buffer += chunk;
|
|
||||||
let lines = buffer.split('\n');
|
|
||||||
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
|
||||||
for (const line of lines) {
|
|
||||||
if (!line.trim()) continue;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const update = JSON.parse(line);
|
const update = JSON.parse(line);
|
||||||
|
|
||||||
switch (update.status) {
|
switch (update.status) {
|
||||||
|
case 'searching':
|
||||||
case 'processing':
|
case 'processing':
|
||||||
case 'thinking':
|
case 'thinking':
|
||||||
// Force an immediate state update based on the message type
|
// Force an immediate state update based on the message type
|
||||||
@ -458,9 +444,26 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
|||||||
await new Promise(resolve => setTimeout(resolve, 0));
|
await new Promise(resolve => setTimeout(resolve, 0));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunk = decoder.decode(value, { stream: true });
|
||||||
|
|
||||||
|
// Process each complete line immediately
|
||||||
|
buffer += chunk;
|
||||||
|
let lines = buffer.split('\n');
|
||||||
|
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) continue;
|
||||||
|
try {
|
||||||
|
await process_line(line);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
setSnack("Error processing query", "error")
|
setSnack("Error processing query", "error")
|
||||||
console.error('Error parsing JSON:', e, line);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -468,47 +471,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
|
|||||||
// Process any remaining buffer content
|
// Process any remaining buffer content
|
||||||
if (buffer.trim()) {
|
if (buffer.trim()) {
|
||||||
try {
|
try {
|
||||||
const update = JSON.parse(buffer);
|
await process_line(buffer);
|
||||||
|
|
||||||
switch (update.status) {
|
|
||||||
case 'processing':
|
|
||||||
case 'thinking':
|
|
||||||
// Force an immediate state update based on the message type
|
|
||||||
// Update processing message with immediate re-render
|
|
||||||
setProcessingMessage({ role: 'status', content: update.response });
|
|
||||||
// Add a small delay to ensure React has time to update the UI
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 0));
|
|
||||||
break;
|
|
||||||
case 'error':
|
|
||||||
// Show error
|
|
||||||
setProcessingMessage({ role: 'error', content: update.response });
|
|
||||||
setTimeout(() => {
|
|
||||||
setProcessingMessage(undefined);
|
|
||||||
}, 5000);
|
|
||||||
break;
|
|
||||||
case 'done':
|
|
||||||
console.log('Done processing:', update);
|
|
||||||
if (onResponse) {
|
|
||||||
update.message = onResponse(update);
|
|
||||||
}
|
|
||||||
setProcessingMessage(undefined);
|
|
||||||
const backstoryMessage: BackstoryMessage = update;
|
|
||||||
setConversation([
|
|
||||||
...conversationRef.current, {
|
|
||||||
// role: 'user',
|
|
||||||
// content: backstoryMessage.prompt || "",
|
|
||||||
// }, {
|
|
||||||
role: 'assistant',
|
|
||||||
origin: type,
|
|
||||||
prompt: backstoryMessage.prompt || "",
|
|
||||||
content: backstoryMessage.response || "",
|
|
||||||
preamble: backstoryMessage.preamble || {},
|
|
||||||
full_content: backstoryMessage.full_content || "",
|
|
||||||
metadata: backstoryMessage.metadata,
|
|
||||||
actions: backstoryMessage.actions,
|
|
||||||
}] as MessageList);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
setSnack("Error processing query", "error")
|
setSnack("Error processing query", "error")
|
||||||
}
|
}
|
||||||
|
@ -92,18 +92,22 @@ class Chat(Agent, ABC):
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
for value in llm.chat(
|
message.status = "thinking"
|
||||||
|
for response in llm.chat(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
#tools=llm_tools(context.tools) if message.enable_tools else None,
|
#tools=llm_tools(context.tools) if message.enable_tools else None,
|
||||||
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
|
options={ "num_ctx": message.metadata["ctx_size"] if message.metadata["ctx_size"] else defines.max_context },
|
||||||
stream=True,
|
stream=True,
|
||||||
):
|
):
|
||||||
logging.info(f"LLM: {'done' if value.done else 'thinking'} - {value.message.content}")
|
logging.info(f"LLM: {'done' if response.done else 'thinking'} - {response.message.content}")
|
||||||
message.response += value.message.content
|
message.response += response.message.content
|
||||||
|
message.metadata["eval_count"] += response["eval_count"]
|
||||||
|
message.metadata["eval_duration"] += response["eval_duration"]
|
||||||
|
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
|
||||||
|
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
|
||||||
yield message
|
yield message
|
||||||
if value.done:
|
if response.done:
|
||||||
response = value
|
|
||||||
message.status = "done"
|
message.status = "done"
|
||||||
|
|
||||||
if not response:
|
if not response:
|
||||||
@ -113,10 +117,6 @@ class Chat(Agent, ABC):
|
|||||||
self.context.processing = False
|
self.context.processing = False
|
||||||
return
|
return
|
||||||
|
|
||||||
message.metadata["eval_count"] += response["eval_count"]
|
|
||||||
message.metadata["eval_duration"] += response["eval_duration"]
|
|
||||||
message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
|
|
||||||
message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
|
|
||||||
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
|
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
|
||||||
|
|
||||||
yield message
|
yield message
|
||||||
@ -212,7 +212,7 @@ class Chat(Agent, ABC):
|
|||||||
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content)
|
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content)
|
||||||
|
|
||||||
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
|
message.response = f"Processing {'RAG augmented ' if message.metadata['rag'] else ''}query..."
|
||||||
message.status = "thinking"
|
message.status = "searching"
|
||||||
yield message
|
yield message
|
||||||
|
|
||||||
async for message in self.generate_llm_response(llm, model, message):
|
async for message in self.generate_llm_response(llm, model, message):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user