Starting to work again

This commit is contained in:
James Ketr 2025-04-30 21:42:30 -07:00
parent 7f24d8870c
commit 2a3dc56897
7 changed files with 185 additions and 802 deletions

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
"@types/node": "^16.18.126",
"@types/react": "^19.0.12",
"@types/react-dom": "^19.0.4",
"@uiw/react-json-view": "^2.0.0-alpha.31",
"mui-markdown": "^1.2.6",
"react": "^19.0.0",
"react-dom": "^19.0.0",

View File

@ -26,8 +26,8 @@ interface ConversationHandle {
interface BackstoryMessage {
prompt: string;
preamble: string;
content: string;
preamble: {};
full_content: string;
response: string;
metadata: {
rag: { documents: [] };
@ -206,8 +206,8 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
}, {
role: 'assistant',
prompt: message.prompt || "",
preamble: message.preamble || "",
full_content: message.content || "",
preamble: message.preamble || {},
full_content: message.full_content || "",
content: message.response || "",
metadata: message.metadata,
actions: message.actions,
@ -402,53 +402,59 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
try {
const update = JSON.parse(line);
console.log('Parsed update:', update.response);
// Force an immediate state update based on the message type
if (update.status === 'processing') {
// Update processing message with immediate re-render
setProcessingMessage({ role: 'status', content: update.message });
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
} else if (update.status === 'done') {
// Replace processing message with final result
if (onResponse) {
update.message = onResponse(update.message);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update.message;
setConversation([
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || "",
full_content: backstoryMessage.content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
const metadata = update.message.metadata;
if (metadata) {
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
}
} else if (update.status === 'error') {
// Show error
setProcessingMessage({ role: 'error', content: update.message });
setTimeout(() => {
switch (update.status) {
case 'processing':
case 'thinking':
// Force an immediate state update based on the message type
// Update processing message with immediate re-render
setProcessingMessage({ role: 'status', content: update.response });
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
case 'done':
// Replace processing message with final result
if (onResponse) {
update.message = onResponse(update.response);
}
setProcessingMessage(undefined);
}, 5000);
const backstoryMessage: BackstoryMessage = update.response;
setConversation([
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
const metadata = update.metadata;
if (metadata) {
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
}
break;
case 'error':
// Show error
setProcessingMessage({ role: 'error', content: update.response });
setTimeout(() => {
setProcessingMessage(undefined);
}, 5000);
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
}
} catch (e) {
setSnack("Error processing query", "error")
@ -462,25 +468,42 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
try {
const update = JSON.parse(buffer);
if (update.status === 'done') {
if (onResponse) {
update.message = onResponse(update.message);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update.message;
setConversation([
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || "",
full_content: backstoryMessage.content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
switch (update.status) {
case 'processing':
case 'thinking':
// Force an immediate state update based on the message type
// Update processing message with immediate re-render
setProcessingMessage({ role: 'status', content: update.response });
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
break;
case 'error':
// Show error
setProcessingMessage({ role: 'error', content: update.response });
setTimeout(() => {
setProcessingMessage(undefined);
}, 5000);
break;
case 'done':
if (onResponse) {
update.message = onResponse(update.message);
}
setProcessingMessage(undefined);
const backstoryMessage: BackstoryMessage = update.message;
setConversation([
...conversationRef.current, {
role: 'user',
content: backstoryMessage.prompt || "",
}, {
role: 'assistant',
prompt: backstoryMessage.prompt || "",
preamble: backstoryMessage.preamble || {},
full_content: backstoryMessage.full_content || "",
content: backstoryMessage.response || "",
metadata: backstoryMessage.metadata,
actions: backstoryMessage.actions,
}] as MessageList);
break;
}
} catch (e) {
setSnack("Error processing query", "error")

View File

@ -32,6 +32,8 @@ type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error
type MessageData = {
role: MessageRoles,
content: string,
full_content?: string,
disableCopy?: boolean,
user?: string,
title?: string,
@ -48,7 +50,6 @@ interface MessageMetaData {
vector_embedding: number[];
},
origin: string,
full_query?: string,
rag: any,
tools: any[],
eval_count: number,
@ -87,7 +88,6 @@ interface MessageMetaProps {
const MessageMeta = (props: MessageMetaProps) => {
const {
/* MessageData */
full_query,
rag,
tools,
eval_count,
@ -95,7 +95,7 @@ const MessageMeta = (props: MessageMetaProps) => {
prompt_eval_count,
prompt_eval_duration,
} = props.metadata || {};
const messageProps = props.messageProps;
const message = props.messageProps.message;
return (<>
<Box sx={{ fontSize: "0.8rem", mb: 1 }}>
@ -137,7 +137,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</TableContainer>
{
full_query !== undefined &&
message.full_content !== undefined &&
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}>
@ -145,7 +145,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</Box>
</AccordionSummary>
<AccordionDetails>
<pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={full_query?.trim()} />{full_query?.trim()}</pre>
<pre style={{ "display": "block", "position": "relative" }}><CopyBubble content={message.full_content?.trim()} />{message.full_content?.trim()}</pre>
</AccordionDetails>
</Accordion>
}
@ -182,8 +182,8 @@ const MessageMeta = (props: MessageMetaProps) => {
</Accordion>
}
{
rag?.name !== undefined && <>
<Accordion>
rag.map((rag: any) => (<>
<Accordion key={rag.name}>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}>
Top RAG {rag.ids.length} matches from '{rag.name}' collection against embedding vector of {rag.query_embedding.length} dimensions
@ -212,7 +212,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</Box>
</AccordionSummary>
<AccordionDetails>
<VectorVisualizer inline {...messageProps} {...props.metadata} rag={rag} />
<VectorVisualizer inline {...message} {...props.metadata} rag={rag} />
</AccordionDetails>
</Accordion>
<Accordion>
@ -222,7 +222,7 @@ const MessageMeta = (props: MessageMetaProps) => {
</Box>
</AccordionSummary>
<AccordionDetails>
{Object.entries(props.messageProps.message)
{Object.entries(message)
.filter(([key, value]) => key !== undefined && value !== undefined)
.map(([key, value]) => (typeof (value) !== "string" || value?.trim() !== "") &&
<Accordion key={key}>
@ -251,8 +251,7 @@ const MessageMeta = (props: MessageMetaProps) => {
)}
</AccordionDetails>
</Accordion>
</>
</>))
}
</>);
};

View File

@ -970,23 +970,6 @@ class WebServer:
else:
yield {"status": "complete", "message": "RAG processing complete"}
# agent_type: chat
# * Q&A
#
# agent_type: job_description
# * First message sets Job Description and generates Resume
# * Has content (Job Description)
# * Then Q&A of Job Description
#
# agent_type: resume
# * First message sets Resume and generates Fact Check
# * Has no content
# * Then Q&A of Resume
#
# Fact Check:
# * First message sets Fact Check and is Q&A
# * Has content
# * Then Q&A of Fact Check
async def generate_response(self, context : Context, agent : Agent, content : str) -> AsyncGenerator[Message, None]:
if not self.file_watcher:
raise Exception("File watcher not initialized")
@ -995,24 +978,20 @@ class WebServer:
logger.info(f"generate_response: {agent_type}")
if agent_type == "chat":
message = Message(prompt=content)
async for value in agent.prepare_message(message):
async for message in agent.prepare_message(message):
# logger.info(f"{agent_type}.prepare_message: {value.status} - {value.response}")
if value.status != "done":
yield value
if value.status == "error":
message.status = "error"
message.response = value.response
if message.status == "error":
yield message
return
async for value in agent.process_message(self.llm, self.model, message):
if message.status != "done":
yield message
async for message in agent.process_message(self.llm, self.model, message):
# logger.info(f"{agent_type}.process_message: {value.status} - {value.response}")
if value.status != "done":
yield value
if value.status == "error":
message.status = "error"
message.response = value.response
if message.status == "error":
yield message
return
if message.status != "done":
yield message
# async for value in agent.generate_llm_response(message):
# logger.info(f"{agent_type}.generate_llm_response: {value.status} - {value.response}")
# if value.status != "done":
@ -1023,6 +1002,7 @@ class WebServer:
# yield message
# return
logger.info("TODO: There is more to do...")
yield message
return
return

View File

@ -30,15 +30,13 @@ class Chat(Agent, ABC):
if not message.disable_rag:
# Gather RAG results, yielding each result
# as it becomes available
for value in self.context.generate_rag_results(message):
logging.info(f"RAG: {value.status} - {value.response}")
if value.status != "done":
yield value
if value.status == "error":
message.status = "error"
message.response = value.response
for message in self.context.generate_rag_results(message):
logging.info(f"RAG: {message.status} - {message.response}")
if message.status == "error":
yield message
return
if message.status != "done":
yield message
if "rag" in message.metadata and message.metadata["rag"]:
for rag in message.metadata["rag"]:
@ -106,11 +104,13 @@ class Chat(Agent, ABC):
yield message
if value.done:
response = value
message.status = "done"
if not response:
message.status = "error"
message.response = "No response from LLM."
yield message
self.context.processing = False
return
message.metadata["eval_count"] += response["eval_count"]
@ -120,6 +120,7 @@ class Chat(Agent, ABC):
self.context_tokens = response["prompt_eval_count"] + response["eval_count"]
yield message
self.context.processing = False
return
tools_used = []
@ -205,6 +206,7 @@ class Chat(Agent, ABC):
message.full_content = ""
for i, p in enumerate(message.preamble.keys()):
message.full_content += '' if i == 0 else '\n\n' + f"<|{p}|>{message.preamble[p].strip()}\n"
message.full_content += f"{message.prompt}"
# Estimate token length of new messages
message.metadata["ctx_size"] = self.context.get_optimal_ctx_size(self.context_tokens, messages=message.full_content)
@ -213,12 +215,14 @@ class Chat(Agent, ABC):
message.status = "thinking"
yield message
async for value in self.generate_llm_response(llm, model, message):
logging.info(f"LLM: {value.status} - {value.response}")
if value.status != "done":
yield value
if value.status == "error":
async for message in self.generate_llm_response(llm, model, message):
logging.info(f"LLM: {message.status} - {message.response}")
if message.status == "error":
return
if message.status != "done":
yield message
yield message
return
def get_and_reset_content_seed(self):
tmp = self._content_seed

View File

@ -119,8 +119,8 @@ class Context(BaseModel):
yield message
return
except Exception as e:
message.response = f"Error generating RAG results: {str(e)}"
message.status = "error"
message.response = f"Error generating RAG results: {str(e)}"
logger.error(e)
yield message
return