Starting to work again

This commit is contained in:
James Ketr 2025-04-30 21:42:30 -07:00
parent 7f24d8870c
commit 2a3dc56897
7 changed files with 185 additions and 802 deletions

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
"@types/node": "^16.18.126", "@types/node": "^16.18.126",
"@types/react": "^19.0.12", "@types/react": "^19.0.12",
"@types/react-dom": "^19.0.4", "@types/react-dom": "^19.0.4",
"@uiw/react-json-view": "^2.0.0-alpha.31",
"mui-markdown": "^1.2.6", "mui-markdown": "^1.2.6",
"react": "^19.0.0", "react": "^19.0.0",
"react-dom": "^19.0.0", "react-dom": "^19.0.0",

View File

@ -26,8 +26,8 @@ interface ConversationHandle {
interface BackstoryMessage { interface BackstoryMessage {
prompt: string; prompt: string;
preamble: string; preamble: {};
content: string; full_content: string;
response: string; response: string;
metadata: { metadata: {
rag: { documents: [] }; rag: { documents: [] };
@ -206,8 +206,8 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
}, { }, {
role: 'assistant', role: 'assistant',
prompt: message.prompt || "", prompt: message.prompt || "",
preamble: message.preamble || "", preamble: message.preamble || {},
full_content: message.content || "", full_content: message.full_content || "",
content: message.response || "", content: message.response || "",
metadata: message.metadata, metadata: message.metadata,
actions: message.actions, actions: message.actions,
@ -402,53 +402,59 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
try { try {
const update = JSON.parse(line); const update = JSON.parse(line);
console.log('Parsed update:', update.response);
// Force an immediate state update based on the message type switch (update.status) {
if (update.status === 'processing') { case 'processing':
// Update processing message with immediate re-render case 'thinking':
setProcessingMessage({ role: 'status', content: update.message }); // Force an immediate state update based on the message type
// Add a small delay to ensure React has time to update the UI // Update processing message with immediate re-render
await new Promise(resolve => setTimeout(resolve, 0)); setProcessingMessage({ role: 'status', content: update.response });
} else if (update.status === 'done') { // Add a small delay to ensure React has time to update the UI
// Replace processing message with final result await new Promise(resolve => setTimeout(resolve, 0));
if (onResponse) { break;
update.message = onResponse(update.message); case 'done':
} // Replace processing message with final result
setProcessingMessage(undefined); if (onResponse) {
const backstoryMessage: BackstoryMessage = update.message; update.message = onResponse(update.response);
setConversation([ }
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || "",
full_content: backstoryMessage.content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
const metadata = update.message.metadata;
if (metadata) {
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
}
} else if (update.status === 'error') {
// Show error
setProcessingMessage({ role: 'error', content: update.message });
setTimeout(() => {
setProcessingMessage(undefined); setProcessingMessage(undefined);
}, 5000); const backstoryMessage: BackstoryMessage = update.response;
setConversation([
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
// Add a small delay to ensure React has time to update the UI const metadata = update.metadata;
await new Promise(resolve => setTimeout(resolve, 0)); if (metadata) {
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
}
break;
case 'error':
// Show error
setProcessingMessage({ role: 'error', content: update.response });
setTimeout(() => {
setProcessingMessage(undefined);
}, 5000);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
} }
} catch (e) { } catch (e) {
setSnack("Error processing query", "error") setSnack("Error processing query", "error")
@ -462,25 +468,42 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
try { try {
const update = JSON.parse(buffer); const update = JSON.parse(buffer);
if (update.status === 'done') { switch (update.status) {
if (onResponse) { case 'processing':
update.message = onResponse(update.message); case 'thinking':
} // Force an immediate state update based on the message type
setProcessingMessage(undefined); // Update processing message with immediate re-render
const backstoryMessage: BackstoryMessage = update.message; setProcessingMessage({ role: 'status', content: update.response });
setConversation([ // Add a small delay to ensure React has time to update the UI
...conversationRef.current, { await new Promise(resolve => setTimeout(resolve, 0));
role: 'user', break;
content: backstoryMessage.prompt || "", case 'error':
}, { // Show error
role: 'assistant', setProcessingMessage({ role: 'error', content: update.response });
prompt: backstoryMessage.prompt || "", setTimeout(() => {
preamble: backstoryMessage.preamble || "", setProcessingMessage(undefined);
full_content: backstoryMessage.content || "", }, 5000);
content: backstoryMessage.response || "", break;
metadata: backstoryMessage.metadata, case 'done':
actions: backstoryMessage.actions, if (onResponse) {
}] as MessageList); update.message = onResponse(update.message);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update.message;
setConversation([
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
break;
} }
} catch (e) { } catch (e) {
setSnack("Error processing query", "error") setSnack("Error processing query", "error")

View File

@ -32,6 +32,8 @@ type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error
type MessageData = { type MessageData = {
role: MessageRoles, role: MessageRoles,
content: string, content: string,
full_content?: string,
disableCopy?: boolean, disableCopy?: boolean,
user?: string, user?: string,
title?: string, title?: string,
@ -48,7 +50,6 @@ interface MessageMetaData {
vector_embedding: number[]; vector_embedding: number[];
}, },
origin: string, origin: string,
full_query?: string,
rag: any, rag: any,
tools: any[], tools: any[],
eval_count: number, eval_count: number,
@ -87,7 +88,6 @@ interface MessageMetaProps {
const MessageMeta = (props: MessageMetaProps) => { const MessageMeta = (props: MessageMetaProps) => {
const { const {
/* MessageData */ /* MessageData */
full_query,
rag, rag,
tools, tools,
eval_count, eval_count,
@ -95,7 +95,7 @@ const MessageMeta = (props: MessageMetaProps) => {
prompt_eval_count, prompt_eval_count,
prompt_eval_duration, prompt_eval_duration,
} = props.metadata || {}; } = props.metadata || {};
const messageProps = props.messageProps; const message = props.messageProps.message;
return (<> return (<>
<Box sx={{ fontSize: "0.8rem", mb: 1 }}> <Box sx={{ fontSize: "0.8rem", mb: 1 }}>
@ -137,7 +137,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</TableContainer> </TableContainer>
{ {
full_query !== undefined && message.full_content !== undefined &&
<Accordion> <Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> <AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}> <Box sx={{ fontSize: "0.8rem" }}>
@ -145,7 +145,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</Box> </Box>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
<pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={full_query?.trim()} />{full_query?.trim()}</pre> <pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={message.full_content?.trim()} />{message.full_content?.trim()}</pre>
</AccordionDetails> </AccordionDetails>
</Accordion> </Accordion>
} }
@ -182,8 +182,8 @@ const MessageMeta = (props: MessageMetaProps) => {
</Accordion> </Accordion>
} }
{ {
rag?.name !== undefined && <> rag.map((rag: any) => (<>
<Accordion> <Accordion key={rag.name}>
<AccordionSummary expandIcon={<ExpandMoreIcon />}> <AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}> <Box sx={{ fontSize: "0.8rem" }}>
Top RAG {rag.ids.length} matches from '{rag.name}' collection against embedding vector of {rag.query_embedding.length} dimensions Top RAG {rag.ids.length} matches from '{rag.name}' collection against embedding vector of {rag.query_embedding.length} dimensions
@ -212,7 +212,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</Box> </Box>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
<VectorVisualizer inline {...messageProps} {...props.metadata} rag={rag} /> <VectorVisualizer inline {...message} {...props.metadata} rag={rag} />
</AccordionDetails> </AccordionDetails>
</Accordion> </Accordion>
<Accordion> <Accordion>
@ -222,7 +222,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</Box> </Box>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
{Object.entries(props.messageProps.message) {Object.entries(message)
.filter(([key, value]) => key !== undefined && value !== undefined) .filter(([key, value]) => key !== undefined && value !== undefined)
.map(([key, value]) => (typeof (value) !== "string" || value?.trim() !== "") && .map(([key, value]) => (typeof (value) !== "string" || value?.trim() !== "") &&
<Accordion key={key}> <Accordion key={key}>
@ -251,8 +251,7 @@ const MessageMeta = (props: MessageMetaProps) => {
)} )}
</AccordionDetails> </AccordionDetails>
</Accordion> </Accordion>
</>))
</>
} }
</>); </>);
}; };

View File

@ -970,23 +970,6 @@ class WebServer:
else: else:
yield {"status": "complete", "message": "RAG processing complete"} yield {"status": "complete", "message": "RAG processing complete"}
# agent_type: chat
# * Q&A
#
# agent_type: job_description
# * First message sets Job Description and generates Resume
# * Has content (Job Description)
# * Then Q&A of Job Description
#
# agent_type: resume
# * First message sets Resume and generates Fact Check
# * Has no content
# * Then Q&A of Resume
#
# Fact Check:
# * First message sets Fact Check and is Q&A
# * Has content
# * Then Q&A of Fact Check
async def generate_response(self, context : Context, agent : Agent, content : str) -> AsyncGenerator[Message, None]: async def generate_response(self, context : Context, agent : Agent, content : str) -> AsyncGenerator[Message, None]:
if not self.file_watcher: if not self.file_watcher:
raise Exception("File watcher not initialized") raise Exception("File watcher not initialized")
@ -995,24 +978,20 @@ class WebServer:
logger.info(f"generate_response: {agent_type}") logger.info(f"generate_response: {agent_type}")
if agent_type == "chat": if agent_type == "chat":
message = Message(prompt=content) message = Message(prompt=content)
async for value in agent.prepare_message(message): async for message in agent.prepare_message(message):
# logger.info(f"{agent_type}.prepare_message: {value.status} - {value.response}") # logger.info(f"{agent_type}.prepare_message: {value.status} - {value.response}")
if value.status != "done": if message.status == "error":
yield value
if value.status == "error":
message.status = "error"
message.response = value.response
yield message yield message
return return
async for value in agent.process_message(self.llm, self.model, message): if message.status != "done":
yield message
async for message in agent.process_message(self.llm, self.model, message):
# logger.info(f"{agent_type}.process_message: {value.status} - {value.response}") # logger.info(f"{agent_type}.process_message: {value.status} - {value.response}")
if value.status != "done": if message.status == "error":
yield value
if value.status == "error":
message.status = "error"
message.response = value.response
yield message yield message
return return
if message.status != "done":
yield message
# async for value in agent.generate_llm_response(message): # async for value in agent.generate_llm_response(message):
# logger.info(f"{agent_type}.generate_llm_response: {value.status} - {value.response}") # logger.info(f"{agent_type}.generate_llm_response: {value.status} - {value.response}")
# if value.status != "done": # if value.status != "done":
@ -1023,6 +1002,7 @@ class WebServer:
# yield message # yield message
# return # return
logger.info("TODO: There is more to do...") logger.info("TODO: There is more to do...")
yield message
return return
return return

View File

@ -30,15 +30,13 @@ class Chat(Agent, ABC):
if not message.disable_rag: if not message.disable_rag:
# Gather RAG results, yielding each result # Gather RAG results, yielding each result
# as it becomes available # as it becomes available
for value in self.context.generate_rag_results(message): for message in self.context.generate_rag_results(message):
logging.info(f"RAG: {value.status} - {value.response}") logging.info(f"RAG: {message.status} - {message.response}")
if value.status != "done": if message.status == "error":
yield value
if value.status == "error":
message.status = "error"
message.response = value.response
yield message yield message
return return
if message.status != "done":
yield message
if "rag" in message.metadata and message.metadata["rag"]: if "rag" in message.metadata and message.metadata["rag"]:
for rag in message.metadata["rag"]: for rag in message.metadata["rag"]:
@ -106,11 +104,13 @@ class Chat(Agent, ABC):
yield message yield message
if value.done: if value.done:
response = value response = value
message.status = "done"
if not response: if not response:
message.status = "error" message.status = "error"
message.response = "No response from LLM." message.response = "No response from LLM."
yield message yield message
self.context.processing = False
return return
message.metadata["eval_count"] += response["eval_count"] message.metadata["eval_count"] += response["eval_count"]
@ -120,6 +120,7 @@ class Chat(Agent, ABC):
self.context_tokens = response["prompt_eval_count"] + response["eval_count"] self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
yield message yield message
self.context.processing = False
return return
tools_used = [] tools_used = []
@ -205,6 +206,7 @@ class Chat(Agent, ABC):
message.full_content = "" message.full_content = ""
for i, p in enumerate(message.preamble.keys()): for i, p in enumerate(message.preamble.keys()):
message.full_content += '' if i == 0 else '\n\n' + f"<|{p}|>{message.preamble[p].strip()}\n" message.full_content += '' if i == 0 else '\n\n' + f"<|{p}|>{message.preamble[p].strip()}\n"
message.full_content += f"{message.prompt}"
# Estimate token length of new messages # Estimate token length of new messages
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content) message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content)
@ -213,12 +215,14 @@ class Chat(Agent, ABC):
message.status = "thinking" message.status = "thinking"
yield message yield message
async for value in self.generate_llm_response(llm, model, message): async for message in self.generate_llm_response(llm, model, message):
logging.info(f"LLM: {value.status} - {value.response}") logging.info(f"LLM: {message.status} - {message.response}")
if value.status != "done": if message.status == "error":
yield value
if value.status == "error":
return return
if message.status != "done":
yield message
yield message
return
def get_and_reset_content_seed(self): def get_and_reset_content_seed(self):
tmp = self._content_seed tmp = self._content_seed

View File

@ -119,8 +119,8 @@ class Context(BaseModel):
yield message yield message
return return
except Exception as e: except Exception as e:
message.response = f"Error generating RAG results: {str(e)}"
message.status = "error" message.status = "error"
message.response = f"Error generating RAG results: {str(e)}"
logger.error(e) logger.error(e)
yield message yield message
return return