diff --git a/frontend/src/Controls.tsx b/frontend/src/Controls.tsx
index 1507cfd..fb9f219 100644
--- a/frontend/src/Controls.tsx
+++ b/frontend/src/Controls.tsx
@@ -109,11 +109,11 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
'Content-Type': 'application/json',
'Accept': 'application/json',
},
- body: JSON.stringify({ "system-prompt": prompt }),
+ body: JSON.stringify({ "system_prompt": prompt }),
});
const data = await response.json();
- const newPrompt = data["system-prompt"];
+ const newPrompt = data["system_prompt"];
if (newPrompt !== serverSystemPrompt) {
setServerSystemPrompt(newPrompt);
setSystemPrompt(newPrompt)
@@ -141,11 +141,11 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
'Content-Type': 'application/json',
'Accept': 'application/json',
},
- body: JSON.stringify({ "message-history-length": length }),
+ body: JSON.stringify({ "message_history_length": length }),
});
const data = await response.json();
- const newLength = data["message-history-length"];
+ const newLength = data["message_history_length"];
if (newLength !== messageHistoryLength) {
setMessageHistoryLength(newLength);
setSnack("Message history length updated", "success");
@@ -159,7 +159,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
sendMessageHistoryLength(messageHistoryLength);
}, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]);
- const reset = async (types: ("rags" | "tools" | "history" | "system-prompt" | "message-history-length")[], message: string = "Update successful.") => {
+ const reset = async (types: ("rags" | "tools" | "history" | "system_prompt" | "message_history_length")[], message: string = "Update successful.") => {
try {
const response = await fetch(connectionBase + `/api/reset/${sessionId}`, {
method: 'PUT',
@@ -183,9 +183,9 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
case "tools":
setTools(value as Tool[]);
break;
- case "system-prompt":
- setServerSystemPrompt((value as any)["system-prompt"].trim());
- setSystemPrompt((value as any)["system-prompt"].trim());
+ case "system_prompt":
+ setServerSystemPrompt((value as any)["system_prompt"].trim());
+ setSystemPrompt((value as any)["system_prompt"].trim());
break;
case "history":
console.log('TODO: handle history reset');
@@ -346,10 +346,10 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
},
});
const data = await response.json();
- const serverSystemPrompt = data["system-prompt"].trim();
+ const serverSystemPrompt = data["system_prompt"].trim();
setServerSystemPrompt(serverSystemPrompt);
setSystemPrompt(serverSystemPrompt);
- setMessageHistoryLength(data["message-history-length"]);
+ setMessageHistoryLength(data["message_history_length"]);
}
fetchTunables();
@@ -402,7 +402,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
/>
-
+
@@ -481,7 +481,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => {
} onClick={() => { reset(["history"], "History cleared."); }}>Clear Backstory History
-
+
);
}
diff --git a/frontend/src/Conversation.tsx b/frontend/src/Conversation.tsx
index 852d592..7afb54f 100644
--- a/frontend/src/Conversation.tsx
+++ b/frontend/src/Conversation.tsx
@@ -3,9 +3,11 @@ import TextField from '@mui/material/TextField';
import Typography from '@mui/material/Typography';
import Tooltip from '@mui/material/Tooltip';
import Button from '@mui/material/Button';
+import IconButton from '@mui/material/IconButton';
import Box from '@mui/material/Box';
import SendIcon from '@mui/icons-material/Send';
-
+import ResetIcon from '@mui/icons-material/RestartAlt';
+import { SxProps, Theme } from '@mui/material';
import PropagateLoader from "react-spinners/PropagateLoader";
import { Message, MessageList, MessageData } from './Message';
@@ -14,24 +16,31 @@ import { ContextStatus } from './ContextStatus';
const loadingMessage: MessageData = { "role": "assistant", "content": "Establishing connection with server..." };
-type ConversationMode = 'chat' | 'fact-check' | 'system';
+type ConversationMode = 'chat' | 'job_description' | 'resume' | 'fact_check';
interface ConversationHandle {
submitQuery: (query: string) => void;
}
interface ConversationProps {
+ className?: string,
type: ConversationMode
prompt: string,
+ actionLabel?: string,
+ resetAction?: () => void,
+ resetLabel?: string,
connectionBase: string,
sessionId?: string,
setSnack: (message: string, severity: SeverityType) => void,
defaultPrompts?: React.ReactElement[],
preamble?: MessageList,
hideDefaultPrompts?: boolean,
+ messageFilter?: (messages: MessageList) => MessageList,
+ messages?: MessageList,
+ sx?: SxProps,
};
-const Conversation = forwardRef(({ prompt, type, preamble, hideDefaultPrompts, defaultPrompts, sessionId, setSnack, connectionBase }: ConversationProps, ref) => {
+const Conversation = forwardRef(({ ...props }: ConversationProps, ref) => {
const [query, setQuery] = useState("");
const [contextUsedPercentage, setContextUsedPercentage] = useState(0);
const [processing, setProcessing] = useState(false);
@@ -43,12 +52,13 @@ const Conversation = forwardRef(({ prompt
const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 });
const [contextWarningShown, setContextWarningShown] = useState(false);
const [noInteractions, setNoInteractions] = useState(true);
+ const setSnack = props.setSnack;
// Update the context status
const updateContextStatus = useCallback(() => {
const fetchContextStatus = async () => {
try {
- const response = await fetch(connectionBase + `/api/context-status/${sessionId}`, {
+ const response = await fetch(props.connectionBase + `/api/context-status/${props.sessionId}/${props.type}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@@ -68,18 +78,18 @@ const Conversation = forwardRef(({ prompt
}
};
fetchContextStatus();
- }, [setContextStatus, connectionBase, setSnack, sessionId]);
+ }, [setContextStatus, props.connectionBase, setSnack, props.sessionId, props.type]);
// Set the initial chat history to "loading" or the welcome message if loaded.
useEffect(() => {
- if (sessionId === undefined) {
+ if (props.sessionId === undefined) {
setConversation([loadingMessage]);
return;
}
const fetchHistory = async () => {
try {
- const response = await fetch(connectionBase + `/api/history/${sessionId}`, {
+ const response = await fetch(props.connectionBase + `/api/history/${props.sessionId}/${props.type}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@@ -89,12 +99,18 @@ const Conversation = forwardRef(({ prompt
throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
}
const data = await response.json();
- console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`)
+ console.log(`History returned from server with ${data.length} entries`)
if (data.length === 0) {
- setConversation(preamble || []);
+ setConversation([
+ ...(props.preamble || []),
+ ...(props.messages || []),
+ ]);
setNoInteractions(true);
} else {
- setConversation(data);
+ setConversation([
+ ...(props.messages || []),
+ ...(props.messageFilter ? props.messageFilter(data) : data)
+ ]);
setNoInteractions(false);
}
updateContextStatus();
@@ -103,10 +119,10 @@ const Conversation = forwardRef(({ prompt
setSnack("Unable to obtain chat history.", "error");
}
};
- if (sessionId !== undefined) {
+ if (props.sessionId !== undefined) {
fetchHistory();
}
- }, [sessionId, setConversation, updateContextStatus, connectionBase, setSnack, preamble]);
+ }, [props.sessionId, setConversation, updateContextStatus, props.connectionBase, setSnack, props.preamble, props.type]);
const isScrolledToBottom = useCallback(()=> {
// Current vertical scroll position
@@ -191,6 +207,40 @@ const Conversation = forwardRef(({ prompt
setContextUsedPercentage(context_used_percentage)
}, [contextStatus, setContextWarningShown, contextWarningShown, setContextUsedPercentage, setSnack]);
+ const reset = async () => {
+ try {
+ const response = await fetch(props.connectionBase + `/api/reset/${props.sessionId}/${props.type}`, {
+ method: 'PUT',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ },
+ body: JSON.stringify({ reset: 'history' })
+ });
+
+ if (!response.ok) {
+ throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
+ }
+
+ if (!response.body) {
+ throw new Error('Response body is null');
+ }
+
+ props.messageFilter && props.messageFilter([]);
+
+ setConversation([
+ ...(props.preamble || []),
+ ...(props.messages || []),
+ ]);
+
+ setNoInteractions(true);
+
+ } catch (e) {
+ setSnack("Error resetting history", "error")
+ console.error('Error resetting history:', e);
+ }
+ };
+
const sendQuery = async (query: string) => {
setNoInteractions(false);
@@ -229,7 +279,7 @@ const Conversation = forwardRef(({ prompt
}
// Make the fetch request with proper headers
- const response = await fetch(connectionBase + `/api/chat/${sessionId}`, {
+ const response = await fetch(props.connectionBase + `/api/chat/${props.sessionId}/${props.type}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -373,8 +423,12 @@ const Conversation = forwardRef(({ prompt
};
return (
-
- {conversation.map((message, index) => )}
+
+ {
+ conversation.map((message, index) =>
+
+ )
+ }
(({ prompt
>Estimated response time: {countdown}s
)}
-
+
setQuery(e.target.value)}
onKeyDown={handleKeyPress}
- placeholder={prompt}
+ placeholder={props.prompt}
id="QueryInput"
/>
-
-
-
+
+ { reset(); }}
+ >
+
+
+
+
+
+
+
+
- {(noInteractions || !hideDefaultPrompts) && defaultPrompts !== undefined && defaultPrompts.length &&
+ {(noInteractions || !props.hideDefaultPrompts) && props.defaultPrompts !== undefined && props.defaultPrompts.length &&
{
- defaultPrompts.map((element, index) => {
+ props.defaultPrompts.map((element, index) => {
return ({element});
})
}
diff --git a/frontend/src/DocumentViewer.tsx b/frontend/src/DocumentViewer.tsx
index 68d823c..dd38ad6 100644
--- a/frontend/src/DocumentViewer.tsx
+++ b/frontend/src/DocumentViewer.tsx
@@ -28,33 +28,21 @@ import { SxProps, Theme } from '@mui/material';
import MuiMarkdown from 'mui-markdown';
-import { Message } from './Message';
+import { Message, ChatQuery } from './Message';
import { Document } from './Document';
-import { MessageData } from './Message';
+import { MessageData, MessageList } from './Message';
import { SeverityType } from './Snack';
+import { Conversation } from './Conversation';
/**
* Props for the DocumentViewer component
* @interface DocumentViewerProps
- * @property {function} generateResume - Function to generate a resume based on job description
- * @property {MessageData | undefined} resume - The generated resume data
- * @property {function} setResume - Function to set the generated resume
- * @property {function} factCheck - Function to fact check the generated resume
- * @property {MessageData | undefined} facts - The fact check results
- * @property {function} setFacts - Function to set the fact check results
- * @property {string} jobDescription - The initial job description
- * @property {function} setJobDescription - Function to set the job description
* @property {SxProps} [sx] - Optional styling properties
+ * @property {string} [connectionBase] - Base URL for fetch calls
+ * @property {string} [sessionId] - Session ID
+ * @property {(message: string, severity: SeverityType) => void} - setSnack UI callback
*/
export interface DocumentViewerProps {
- generateResume: (jobDescription: string) => void;
- resume: MessageData | undefined;
- setResume: (resume: MessageData | undefined) => void;
- factCheck: (resume: string) => void;
- facts: MessageData | undefined;
- setFacts: (facts: MessageData | undefined) => void;
- jobDescription: string | undefined;
- setJobDescription: (jobDescription: string | undefined) => void;
sx?: SxProps;
connectionBase: string;
sessionId: string;
@@ -67,19 +55,16 @@ export interface DocumentViewerProps {
* with different layouts for mobile and desktop views.
*/
const DocumentViewer: React.FC = ({
- generateResume,
- jobDescription,
- factCheck,
- resume,
- setResume,
- facts,
- setFacts,
sx,
connectionBase,
sessionId,
setSnack
}) => {
// State for editing job description
+ const [jobDescription, setJobDescription] = useState(undefined);
+ const [facts, setFacts] = useState(undefined);
+ const [resume, setResume] = useState(undefined);
+
const [editJobDescription, setEditJobDescription] = useState(jobDescription);
// Processing state to show loading indicators
const [processing, setProcessing] = useState(undefined);
@@ -122,8 +107,8 @@ const DocumentViewer: React.FC = ({
}
setProcessing("resume");
setTimeout(() => { setActiveTab(1); }, 250); // Switch to resume view on mobile
- generateResume(description);
- }, [generateResume, setProcessing, setActiveTab, setResume]);
+ console.log('generateResume(description);');
+ }, [/*generateResume*/, setProcessing, setActiveTab, setResume]);
/**
* Trigger fact check and update UI state
@@ -137,9 +122,9 @@ const DocumentViewer: React.FC = ({
return;
}
setProcessing("facts");
- factCheck(resume);
+ console.log('factCheck(resume)');
setTimeout(() => { setActiveTab(2); }, 250); // Switch to resume view on mobile
- }, [factCheck, setResume, setProcessing, setActiveTab, setFacts]);
+ }, [/*factCheck,*/ setResume, setProcessing, setActiveTab, setFacts]);
useEffect(() => {
setEditJobDescription(jobDescription);
@@ -192,62 +177,73 @@ const DocumentViewer: React.FC = ({
triggerGeneration(editJobDescription || "");
}
};
+ const handleJobQuery = (query: string) => {
+ triggerGeneration(query);
+ };
- const renderJobDescriptionView = () => {
- const children = [];
+ const jobDescriptionQuestions = [
+
+
+
+ ,
+ ];
- if (resume === undefined && processing === undefined) {
- children.push(
-
- setEditJobDescription(e.target.value)}
- onKeyDown={handleKeyPress}
- placeholder="Paste a job description, then click Generate..."
- />
-
- );
- } else {
- children.push({editJobDescription})
+ const filterJobDescriptionMessages = (messages: MessageList): MessageList => {
+ /* The second messages is the RESUME (the LLM response to the JOB-DESCRIPTION) */
+ if (messages.length > 1) {
+ setResume(messages[1]);
+ } else if (resume !== undefined) {
+ setResume(undefined);
}
- children.push(
-
- { setEditJobDescription(""); triggerGeneration(undefined); }}
- >
-
-
-
-
-
-
-
-
- );
+ /* Filter out the RESUME */
+ const reduced = messages.filter((message, index) => index != 1);
- return children;
+ /* Set the first message as coming from the assistant (rendered as markdown) */
+ if (reduced.length > 0) {
+ reduced[0].role = 'assistant';
+ }
+ return reduced;
+ };
+
+ const jobDescriptionMessages: MessageList = [];
+
+ const renderJobDescriptionView = () => {
+ if (resume === undefined) {
+ return
+
+ } else {
+ return
+
+ }
}
/**
@@ -363,7 +359,7 @@ const DocumentViewer: React.FC = ({
const otherRatio = showResume ? (100 - splitRatio / 2) : 100;
const children = [];
children.push(
-
+
{renderJobDescriptionView()}
);
@@ -418,7 +414,7 @@ const DocumentViewer: React.FC = ({
}
return (
-
+
{children}
@@ -428,7 +424,7 @@ const DocumentViewer: React.FC = ({
}
return (
-
+
{getActiveDesktopContent()}
);
diff --git a/frontend/src/Message.tsx b/frontend/src/Message.tsx
index 8f75205..ab3cbee 100644
--- a/frontend/src/Message.tsx
+++ b/frontend/src/Message.tsx
@@ -233,7 +233,7 @@ const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, conne
const formattedContent = message.content.trim();
return (
-
+
{
- const [lastEvalTPS, setLastEvalTPS] = useState(35);
- const [lastPromptTPS, setLastPromptTPS] = useState(430);
- const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 });
const [jobDescription, setJobDescription] = useState(undefined);
-
- const updateContextStatus = useCallback(() => {
- fetch(connectionBase + `/api/context-status/${sessionId}`, {
- method: 'GET',
- headers: {
- 'Content-Type': 'application/json',
- },
- })
- .then(response => response.json())
- .then(data => {
- setContextStatus(data);
- })
- .catch(error => {
- console.error('Error getting context status:', error);
- setSnack("Unable to obtain context status.", "error");
- });
- }, [setContextStatus, connectionBase, setSnack, sessionId]);
-
- // If the jobDescription and resume have not been set, fetch them from the server
- useEffect(() => {
- if (sessionId === undefined) {
- return;
- }
- if (jobDescription !== undefined) {
- return;
- }
- const fetchResume = async () => {
- try {
- // Make the fetch request with proper headers
- const response = await fetch(connectionBase + `/api/resume/${sessionId}`, {
- method: 'GET',
- headers: {
- 'Content-Type': 'application/json',
- 'Accept': 'application/json',
- },
- });
- if (!response.ok) {
- throw Error();
- }
- const data: Resume[] = await response.json();
- if (data.length) {
- const lastResume = data[data.length - 1];
- console.log(lastResume);
- setJobDescription(lastResume['job_description']);
- setResume(lastResume.resume);
- if (lastResume['fact_check'] !== undefined && lastResume['fact_check'] !== null) {
- lastResume['fact_check'].role = 'info';
- setFacts(lastResume['fact_check'])
- } else {
- setFacts(undefined)
- }
- }
- } catch (error: any) {
- setSnack("Unable to fetch resume", "error");
- console.error(error);
- }
- }
-
- fetchResume();
- }, [sessionId, resume, jobDescription, setResume, setJobDescription, setSnack, setFacts, connectionBase]);
-
- // const startCountdown = (seconds: number) => {
- // if (timerRef.current) clearInterval(timerRef.current);
- // setCountdown(seconds);
- // timerRef.current = setInterval(() => {
- // setCountdown((prev) => {
- // if (prev <= 1) {
- // clearInterval(timerRef.current);
- // timerRef.current = null;
- // if (isScrolledToBottom()) {
- // setTimeout(() => {
- // scrollToBottom();
- // }, 50)
- // }
- // return 0;
- // }
- // return prev - 1;
- // });
- // }, 1000);
- // };
-
- // const stopCountdown = () => {
- // if (timerRef.current) {
- // clearInterval(timerRef.current);
- // timerRef.current = null;
- // setCountdown(0);
- // }
- // };
-
if (sessionId === undefined) {
return (<>>);
}
- const generateResume = async (description: string) => {
- if (!description.trim()) return;
- setResume(undefined);
- setFacts(undefined);
-
- try {
- setProcessing(true);
-
- // Add initial processing message
- //setGenerateStatus({ role: 'assistant', content: 'Processing request...' });
-
- // Make the fetch request with proper headers
- const response = await fetch(connectionBase + `/api/generate-resume/${sessionId}`, {
- method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- 'Accept': 'application/json',
- },
- body: JSON.stringify({ content: description.trim() }),
- });
-
- // We'll guess that the response will be around 500 tokens...
- const token_guess = 500;
- const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS);
-
- setSnack(`Job description sent. Response estimated in ${estimate}s.`, "info");
- //startCountdown(Math.round(estimate));
-
- if (!response.ok) {
- throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
- }
-
- if (!response.body) {
- throw new Error('Response body is null');
- }
-
- // Set up stream processing with explicit chunking
- const reader = response.body.getReader();
- const decoder = new TextDecoder();
- let buffer = '';
-
- while (true) {
- const { done, value } = await reader.read();
- if (done) {
- break;
- }
-
- const chunk = decoder.decode(value, { stream: true });
-
- // Process each complete line immediately
- buffer += chunk;
- let lines = buffer.split('\n');
- buffer = lines.pop() || ''; // Keep incomplete line in buffer
- for (const line of lines) {
- if (!line.trim()) continue;
-
- try {
- const update = JSON.parse(line);
-
- // Force an immediate state update based on the message type
- if (update.status === 'processing') {
- // Update processing message with immediate re-render
- //setGenerateStatus({ role: 'info', content: update.message });
- console.log(update.num_ctx);
-
- // Add a small delay to ensure React has time to update the UI
- await new Promise(resolve => setTimeout(resolve, 0));
-
- } else if (update.status === 'done') {
- // Replace processing message with final result
- //setGenerateStatus(undefined);
- setResume(update.message);
- const metadata = update.message.metadata;
- const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
- const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
- setLastEvalTPS(evalTPS ? evalTPS : 35);
- setLastPromptTPS(promptTPS ? promptTPS : 35);
- updateContextStatus();
- } else if (update.status === 'error') {
- // Show error
- //setGenerateStatus({ role: 'error', content: update.message });
- }
- } catch (e) {
- setSnack("Error generating resume", "error")
- console.error('Error parsing JSON:', e, line);
- }
- }
- }
-
- // Process any remaining buffer content
- if (buffer.trim()) {
- try {
- const update = JSON.parse(buffer);
-
- if (update.status === 'done') {
- //setGenerateStatus(undefined);
- setResume(update.message);
- }
- } catch (e) {
- setSnack("Error processing job description", "error")
- }
- }
-
- //stopCountdown();
- setProcessing(false);
- } catch (error) {
- console.error('Fetch error:', error);
- setSnack("Unable to process job description", "error");
- //setGenerateStatus({ role: 'error', content: `Error: ${error}` });
- setProcessing(false);
- //stopCountdown();
- }
- };
-
- const factCheck = async (resume: string) => {
- if (!resume.trim()) return;
- setFacts(undefined);
-
- try {
- setProcessing(true);
-
- const response = await fetch(connectionBase + `/api/fact-check/${sessionId}`, {
- method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- 'Accept': 'application/json',
- },
- body: JSON.stringify({ content: resume.trim() }),
- });
-
- // We'll guess that the response will be around 500 tokens...
- const token_guess = 500;
- const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS);
-
- setSnack(`Resume sent for Fact Check. Response estimated in ${estimate}s.`, "info");
- //startCountdown(Math.round(estimate));
-
- if (!response.ok) {
- throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
- }
-
- if (!response.body) {
- throw new Error('Response body is null');
- }
-
- // Set up stream processing with explicit chunking
- const reader = response.body.getReader();
- const decoder = new TextDecoder();
- let buffer = '';
-
- while (true) {
- const { done, value } = await reader.read();
- if (done) {
- break;
- }
-
- const chunk = decoder.decode(value, { stream: true });
-
- // Process each complete line immediately
- buffer += chunk;
- let lines = buffer.split('\n');
- buffer = lines.pop() || ''; // Keep incomplete line in buffer
- for (const line of lines) {
- if (!line.trim()) continue;
-
- try {
- const update = JSON.parse(line);
-
- // Force an immediate state update based on the message type
- if (update.status === 'processing') {
- // Add a small delay to ensure React has time to update the UI
- await new Promise(resolve => setTimeout(resolve, 0));
-
- } else if (update.status === 'done') {
- // Replace processing message with final result
- update.message.role = 'info';
- setFacts(update.message);
- const metadata = update.message.metadata;
- const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
- const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
- setLastEvalTPS(evalTPS ? evalTPS : 35);
- setLastPromptTPS(promptTPS ? promptTPS : 35);
- updateContextStatus();
- } else if (update.status === 'error') {
- }
- } catch (e) {
- setSnack("Error generating resume", "error")
- console.error('Error parsing JSON:', e, line);
- }
- }
- }
-
- // Process any remaining buffer content
- if (buffer.trim()) {
- try {
- const update = JSON.parse(buffer);
-
- if (update.status === 'done') {
- update.message.role = 'info';
- setFacts(update.message);
- }
- } catch (e) {
- setSnack("Error processing resume", "error")
- }
- }
-
- //stopCountdown();
- setProcessing(false);
- } catch (error) {
- console.error('Fetch error:', error);
- setSnack("Unable to process resume", "error");
- //setGenerateStatus({ role: 'error', content: `Error: ${error}` });
- setProcessing(false);
- //stopCountdown();
- }
- };
-
return (
@@ -350,7 +40,7 @@ const ResumeBuilder = ({ facts, setFacts, resume, setResume, setProcessing, proc
overflowY: "auto",
flexDirection: "column",
height: "calc(0vh - 0px)", // Hack to make the height work
- }} {...{ factCheck, facts, jobDescription, generateResume, resume, setFacts, setResume, setSnack, setJobDescription, connectionBase, sessionId }} />
+ }} {...{ setSnack, connectionBase, sessionId }} />
);
diff --git a/src/server.py b/src/server.py
index 1def9b8..9439501 100644
--- a/src/server.py
+++ b/src/server.py
@@ -11,6 +11,7 @@ import uuid
import subprocess
import re
import math
+import copy
def try_import(module_name, pip_name=None):
try:
@@ -52,6 +53,8 @@ from tools import (
tools
)
+CONTEXT_VERSION=2
+
rags = [
{ "name": "JPK", "enabled": True, "description": "Expert data about James Ketrenos, including work history, personal hobbies, and projects." },
# { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
@@ -164,6 +167,8 @@ Always use tools and [{context_tag}] when possible. Be concise, and never make u
""".strip()
system_generate_resume = f"""
+Launched on {DateTime()}.
+
You are a professional resume writer. Your task is to write a polished, tailored resume for a specific job based only on the individual's [WORK HISTORY].
When answering queries, follow these steps:
@@ -188,9 +193,11 @@ Structure the resume professionally with the following sections where applicable
Do not include any information unless it is provided in [WORK HISTORY] or [INTRO].
Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes.
-"""
+""".strip()
system_fact_check = f"""
+Launched on {DateTime()}.
+
You are a professional resume fact checker. Your task is to identify any inaccuracies in the [RESUME] based on the individual's [WORK HISTORY].
If there are inaccuracies, list them in a bullet point format.
@@ -198,7 +205,20 @@ If there are inaccuracies, list them in a bullet point format.
When answering queries, follow these steps:
1. You must not invent or assume any information not explicitly present in the [WORK HISTORY].
2. Analyze the [RESUME] to identify any discrepancies or inaccuracies based on the [WORK HISTORY].
-"""
+""".strip()
+
+system_job_description = f"""
+Launched on {DateTime()}.
+
+You are a hiring and job placing specialist. Your task is to answers about a job description.
+
+When answering queries, follow these steps:
+1. Analyze the [JOB DESCRIPTION] to provide insights for the asked question.
+2. If any financial information is requested, be sure to account for inflation.
+""".strip()
+
+def create_system_message(prompt):
+ return [{"role": "system", "content": prompt}]
tool_log = []
command_log = []
@@ -374,6 +394,9 @@ async def handle_tool_calls(message):
final_result = all_responses[0] if len(all_responses) == 1 else all_responses
yield (final_result, tools_used)
+
+
+
# %%
class WebServer:
def __init__(self, logging, client, model=MODEL_NAME):
@@ -431,71 +454,6 @@ class WebServer:
return RedirectResponse(url=f"/{context['id']}", status_code=307)
#return JSONResponse({"redirect": f"/{context['id']}"})
- @self.app.get("/api/query")
- async def query_documents(query: str, top_k: int = 3):
- if not self.file_watcher:
- return
-
- """Query the RAG system with the given prompt."""
- results = self.file_watcher.find_similar(query, top_k=top_k)
- return {
- "query": query,
- "results": [
- {
- "content": doc,
- "metadata": meta,
- "distance": dist
- }
- for doc, meta, dist in zip(
- results["documents"],
- results["metadatas"],
- results["distances"]
- )
- ]
- }
-
- @self.app.post("/api/refresh/{file_path:path}")
- async def refresh_document(file_path: str, background_tasks: BackgroundTasks):
- if not self.file_watcher:
- return
-
- """Manually refresh a specific document in the collection."""
- full_path = os.path.join(defines.doc_dir, file_path)
-
- if not os.path.exists(full_path):
- return {"status": "error", "message": "File not found"}
-
- # Schedule the update in the background
- background_tasks.add_task(
- self.file_watcher.process_file_update, full_path
- )
-
- return {
- "status": "success",
- "message": f"Document refresh scheduled for {file_path}"
- }
-
- # @self.app.post("/api/refresh-all")
- # async def refresh_all_documents():
- # if not self.file_watcher:
- # return
-
- # """Refresh all documents in the collection."""
- # # Re-initialize file hashes and process all files
- # self.file_watcher._initialize_file_hashes()
-
- # # Schedule updates for all files
- # file_paths = self.file_watcher.file_hashes.keys()
- # tasks = [self.file_watcher.process_file_update(path) for path in file_paths]
-
- # # Wait for all updates to complete
- # await asyncio.gather(*tasks)
-
- # return {
- # "status": "success",
- # "message": f"Refreshed {len(file_paths)} documents",
- # "document_count": file_watcher.collection.count()
- # }
@self.app.put("/api/umap/{context_id}")
async def put_umap(context_id: str, request: Request):
@@ -566,20 +524,23 @@ class WebServer:
logging.error(e)
#return JSONResponse({"error": str(e)}, 500)
- @self.app.put("/api/reset/{context_id}")
- async def put_reset(context_id: str, request: Request):
+ @self.app.put("/api/reset/{context_id}/{type}")
+ async def put_reset(context_id: str, type: str, request: Request):
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
+ if type not in context["sessions"]:
+ return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404)
+
data = await request.json()
try:
response = {}
for reset in data["reset"]:
match reset:
- case "system-prompt":
- context["system"] = [{"role": "system", "content": system_message}]
- response["system-prompt"] = { "system-prompt": system_message }
+ case "system_prompt":
+ context["sessions"][type]["system_prompt"] = system_message
+ response["system_prompt"] = { "system_prompt": system_message }
case "rags":
context["rags"] = rags.copy()
response["rags"] = context["rags"]
@@ -587,23 +548,23 @@ class WebServer:
context["tools"] = default_tools(tools)
response["tools"] = context["tools"]
case "history":
- context["llm_history"] = []
- context["user_history"] = []
+ context["sessions"][type]["llm_history"] = []
+ context["sessions"][type]["user_history"] = []
+ context["sessions"][type]["context_tokens"] = round(len(str(context["system"])) * 3 / 4) # Estimate context usage
response["history"] = []
- context["context_tokens"] = round(len(str(context["system"])) * 3 / 4) # Estimate context usage
- response["context_used"] = context["context_tokens"]
- case "message-history-length":
+ response["context_used"] = context["sessions"][type]["context_tokens"]
+ case "message_history_length":
context["message_history_length"] = DEFAULT_HISTORY_LENGTH
- response["message-history-length"] = DEFAULT_HISTORY_LENGTH
+ response["message_history_length"] = DEFAULT_HISTORY_LENGTH
if not response:
- return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system-prompt}"})
+ return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system_prompt}"})
else:
self.save_context(context_id)
return JSONResponse(response)
except:
- return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system-prompt}"})
+ return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system_prompt}"})
@self.app.put("/api/tunables/{context_id}")
async def put_tunables(context_id: str, request: Request):
@@ -614,20 +575,20 @@ class WebServer:
data = await request.json()
for k in data.keys():
match k:
- case "system-prompt":
+ case "system_prompt":
system_prompt = data[k].strip()
if not system_prompt:
return JSONResponse({ "status": "error", "message": "System prompt can not be empty." })
context["system"] = [{"role": "system", "content": system_prompt}]
self.save_context(context_id)
- return JSONResponse({ "system-prompt": system_prompt })
- case "message-history-length":
+ return JSONResponse({ "system_prompt": system_prompt })
+ case "message_history_length":
value = max(0, int(data[k]))
context["message_history_length"] = value
self.save_context(context_id)
- return JSONResponse({ "message-history-length": value })
+ return JSONResponse({ "message_history_length": value })
case _:
- return JSONResponse({ "error": f"Unrecognized tunable {k}"}, 404)
+ return JSONResponse({ "error": f"Unrecognized tunable {k}"}, status_code=404)
@self.app.get("/api/tunables/{context_id}")
async def get_tunables(context_id: str):
@@ -636,33 +597,29 @@ class WebServer:
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
return JSONResponse({
- "system-prompt": context["system"][0]["content"],
- "message-history-length": context["message_history_length"]
+ "system_prompt": context["system"][0]["content"],
+ "message_history_length": context["message_history_length"]
})
- @self.app.get("/api/resume/{context_id}")
- async def get_resume(context_id: str):
- if not is_valid_uuid(context_id):
- logging.warning(f"Invalid context_id: {context_id}")
- return JSONResponse({"error": "Invalid context_id"}, status_code=400)
- context = self.upsert_context(context_id)
- return JSONResponse(context["resume_history"])
-
@self.app.get("/api/system-info/{context_id}")
async def get_system_info(context_id: str):
return JSONResponse(system_info(self.model))
- @self.app.post("/api/chat/{context_id}")
- async def chat_endpoint(context_id: str, request: Request):
+ @self.app.post("/api/chat/{context_id}/{type}")
+ async def chat_endpoint(context_id: str, type: str, request: Request):
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
+
+ if type not in context["sessions"]:
+ return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404)
+
data = await request.json()
# Create a custom generator that ensures flushing
async def flush_generator():
- async for message in self.chat(context=context, content=data["content"]):
+ async for message in self.chat(context=context, type=type, content=data["content"]):
# Convert to JSON and add newline
yield json.dumps(message) + "\n"
# Save the history as its generated
@@ -681,74 +638,18 @@ class WebServer:
}
)
- @self.app.post("/api/generate-resume/{context_id}")
- async def post_generate_resume(context_id: str, request: Request):
- if not is_valid_uuid(context_id):
- logging.warning(f"Invalid context_id: {context_id}")
- return JSONResponse({"error": "Invalid context_id"}, status_code=400)
- context = self.upsert_context(context_id)
- data = await request.json()
-
- # Create a custom generator that ensures flushing
- async def flush_generator():
- async for message in self.generate_resume(context=context, content=data["content"]):
- # Convert to JSON and add newline
- yield json.dumps(message) + "\n"
- # Save the history as its generated
- self.save_context(context_id)
- # Explicitly flush after each yield
- await asyncio.sleep(0) # Allow the event loop to process the write
-
- # Return StreamingResponse with appropriate headers
- return StreamingResponse(
- flush_generator(),
- media_type="application/json",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
- }
- )
-
- @self.app.post("/api/fact-check/{context_id}")
- async def post_fact_check(context_id: str, request: Request):
- if not is_valid_uuid(context_id):
- logging.warning(f"Invalid context_id: {context_id}")
- return JSONResponse({"error": "Invalid context_id"}, status_code=400)
- context = self.upsert_context(context_id)
- data = await request.json()
-
- # Create a custom generator that ensures flushing
- async def flush_generator():
- async for message in self.fact_check(context=context, content=data["content"]):
- # Convert to JSON and add newline
- yield json.dumps(message) + "\n"
- # Save the history as its generated
- self.save_context(context_id)
- # Explicitly flush after each yield
- await asyncio.sleep(0) # Allow the event loop to process the write
-
- # Return StreamingResponse with appropriate headers
- return StreamingResponse(
- flush_generator(),
- media_type="application/json",
- headers={
- "Cache-Control": "no-cache",
- "Connection": "keep-alive",
- "X-Accel-Buffering": "no" # Prevents Nginx buffering if you"re using it
- }
- )
-
@self.app.post("/api/context")
async def create_context():
context = self.create_context()
self.logging.info(f"Generated new session as {context['id']}")
return JSONResponse(context)
- @self.app.get("/api/history/{context_id}")
- async def get_history(context_id: str):
+ @self.app.get("/api/history/{context_id}/{type}")
+ async def get_history(context_id: str, type: str):
context = self.upsert_context(context_id)
- return JSONResponse(context["user_history"])
+ if type not in context["sessions"]:
+ return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404)
+ return JSONResponse(context["sessions"][type]["user_history"])
@self.app.get("/api/tools/{context_id}")
async def get_tools(context_id: str):
@@ -770,7 +671,7 @@ class WebServer:
tool["enabled"] = enabled
self.save_context(context_id)
return JSONResponse(context["tools"])
- return JSONResponse({ "status": f"{modify} not found in tools." }), 404
+ return JSONResponse({ "status": f"{modify} not found in tools." }, status_code=404)
except:
return JSONResponse({ "status": "error" }), 405
@@ -794,17 +695,19 @@ class WebServer:
tool["enabled"] = enabled
self.save_context(context_id)
return JSONResponse(context["rags"])
- return JSONResponse({ "status": f"{modify} not found in tools." }), 404
+ return JSONResponse({ "status": f"{modify} not found in tools." }, status_code=404)
except:
return JSONResponse({ "status": "error" }), 405
- @self.app.get("/api/context-status/{context_id}")
- async def get_context_status(context_id):
+ @self.app.get("/api/context-status/{context_id}/{type}")
+ async def get_context_status(context_id, type: str):
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
- return JSONResponse({"context_used": context["context_tokens"], "max_context": defines.max_context})
+ if type not in context["sessions"]:
+ return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404)
+ return JSONResponse({"context_used": context["sessions"][type]["context_tokens"], "max_context": defines.max_context})
@self.app.get("/api/health")
async def health_check():
@@ -839,15 +742,80 @@ class WebServer:
# Create the full file path
file_path = os.path.join(defines.session_dir, session_id)
- umap_model = context.get("umap_model")
- if umap_model:
- del context["umap_model"]
# Serialize the data to JSON and write to file
with open(file_path, "w") as f:
json.dump(context, f)
return session_id
+
+ def migrate_context(self, context):
+ # No version
+ # context = {
+ # "id": context_id,
+ # "tools": default_tools(tools),
+ # "rags": rags.copy(),
+ # "context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage
+ # "message_history_length": 5, # Number of messages to supply in context
+ # "system": system_context,
+ # "system_generate_resume": system_generate_resume,
+ # "llm_history": [],
+ # "user_history": [],
+ # "resume_history": [],
+ # }
+ # Version 2:
+ # context = {
+ # "version": 2,
+ # "id": context_id,
+ # "sessions": {
+ # **TYPE**: { # chat, job-description, resume, fact-check
+ # "system_prompt": **SYSTEM_MESSAGE**,
+ # "llm_history": [],
+ # "user_history": [],
+ # "context_tokens": round(len(str(**SYSTEM_MESSAGE**)) * 3 / 4),
+ # }
+ # },
+ # "tools": default_tools(tools),
+ # "rags": rags.copy(),
+ # "message_history_length": 5 # Number of messages to supply in context
+ # }
+ if "version" not in context:
+ logging.info(f"Migrating {context['id']}")
+ context["version"] = CONTEXT_VERSION
+ context["sessions"] = {
+ "chat": {
+ "system_prompt": system_message,
+ "llm_history": context["llm_history"],
+ "user_history": context["user_history"],
+ "context_tokens": round(len(str(create_system_message(system_message))))
+ },
+ "job_description": {
+ "system_prompt": system_job_description,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(create_system_message(system_job_description))))
+ },
+ "resume": {
+ "system_prompt": system_generate_resume,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(create_system_message(system_generate_resume))))
+ },
+ "fact_check": {
+ "system_prompt": system_fact_check,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(create_system_message(system_fact_check))))
+ },
+ }
+ del context["system"]
+ del context["system_generate_resume"]
+ del context["llm_history"]
+ del context["user_history"]
+ del context["resume_history"]
+
+ return context
+
def load_context(self, session_id):
"""
Load a serialized Python dictionary from a file in the sessions directory.
@@ -868,22 +836,42 @@ class WebServer:
with open(file_path, "r") as f:
self.contexts[session_id] = json.load(f)
- return self.contexts[session_id]
+ return self.migrate_context(self.contexts[session_id])
def create_context(self, context_id = None):
if not context_id:
context_id = str(uuid.uuid4())
- system_context = [{"role": "system", "content": system_message}];
context = {
"id": context_id,
- "system": system_context,
- "system_generate_resume": system_generate_resume,
- "llm_history": [],
- "user_history": [],
+ "version": CONTEXT_VERSION,
+ "sessions": {
+ "chat": {
+ "system_prompt": system_message,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(system_message)) * 3 / 4), # Estimate context usage
+ },
+ "job_description": {
+ "system_prompt": system_job_description,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(system_job_description)) * 3 / 4), # Estimate context usage
+ },
+ "resume": {
+ "system_prompt": system_generate_resume,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(system_generate_resume)) * 3 / 4), # Estimate context usage
+ },
+ "fact_check": {
+ "system_prompt": system_fact_check,
+ "llm_history": [],
+ "user_history": [],
+ "context_tokens": round(len(str(system_fact_check)) * 3 / 4), # Estimate context usage
+ },
+ },
"tools": default_tools(tools),
- "resume_history": [],
"rags": rags.copy(),
- "context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage
"message_history_length": 5 # Number of messages to supply in context
}
logging.info(f"{context_id} created and added to sessions.")
@@ -903,7 +891,7 @@ class WebServer:
logging.info(f"Context {context_id} not found. Creating new context.")
return self.load_context(context_id)
- async def chat(self, context, content):
+ async def chat(self, context, type, content):
if not self.file_watcher:
return
@@ -917,62 +905,173 @@ class WebServer:
return
self.processing = True
-
- llm_history = context["llm_history"]
- user_history = context["user_history"]
- metadata = {
- "rag": {},
- "tools": [],
- "eval_count": 0,
- "eval_duration": 0,
- "prompt_eval_count": 0,
- "prompt_eval_duration": 0,
- }
- rag_docs = []
- for rag in context["rags"]:
- if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
- yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
- chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
- if chroma_results:
- rag_docs.extend(chroma_results["documents"])
- chroma_embedding = chroma_results["query_embedding"]
- metadata["rag"] = {
- **chroma_results,
- "name": rag["name"],
- "umap_embedding_2d": self.file_watcher.umap_model_2d.transform([chroma_embedding])[0].tolist(),
- "umap_embedding_3d": self.file_watcher.umap_model_3d.transform([chroma_embedding])[0].tolist()
- }
- preamble = ""
- if len(rag_docs):
- preamble = f"""
-1. Respond to this query: {content}
-2. If there is information in this context to enhance the answer, do so:
-[{context_tag}]:\n"""
- for doc in rag_docs:
- preamble += doc
- preamble += f"\n[/{context_tag}]\nUse all of that information to respond to: "
- # Figure
- llm_history.append({"role": "user", "content": preamble + content})
- user_history.append({"role": "user", "content": content})
+ try:
+ llm_history = context["sessions"][type]["llm_history"]
+ user_history = context["sessions"][type]["user_history"]
+ metadata = {
+ "type": type,
+ "rag": { "documents": [] },
+ "tools": [],
+ "eval_count": 0,
+ "eval_duration": 0,
+ "prompt_eval_count": 0,
+ "prompt_eval_duration": 0,
+ }
- if context["message_history_length"]:
- messages = context["system"] + llm_history[-context["message_history_length"]:]
- else:
- messages = context["system"] + llm_history
+ # Default to not using tools
+ enable_tools = False
+
+ # Default eo using RAG
+ enable_rag = True
+
+ # The first time a particular session type is used, it is handled differently. After the initial pass (once the
+ # llm_history has more than one entry), the standard 'chat' is used.
+ if len(user_history) >= 1:
+ process_type = "chat"
+ # Do not enable RAG when limiting context to the job description chat
+ if type == "job_description":
+ enable_rag = False
+ else:
+ process_type = type
+
+ if enable_rag:
+ for rag in context["rags"]:
+ if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
+ yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
+ chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
+ if chroma_results:
+ chroma_embedding = chroma_results["query_embedding"]
+ metadata["rag"] = {
+ **chroma_results,
+ "name": rag["name"],
+ "umap_embedding_2d": self.file_watcher.umap_model_2d.transform([chroma_embedding])[0].tolist(),
+ "umap_embedding_3d": self.file_watcher.umap_model_3d.transform([chroma_embedding])[0].tolist()
+ }
+
+
+ match process_type:
+ # Normal chat interactions with context history
+ case "chat":
+ enable_tools = True
+ preamble = ""
+ rag_context = ""
+ for doc in metadata["rag"]["documents"]:
+ rag_context += doc
+ if rag_context:
+ preamble = f"""
+ 1. Respond to this query: {content}
+ 2. If there is information in this context to enhance the answer, do so:
+ [{context_tag}]
+ {rag_context}
+ [/{context_tag}]
+ Use that information to respond to: """
+
+ # Single job_description is provided; generate a resume
+ case "job_description":
+ # Always force the full resume to be in context
+ resume_doc = open(defines.resume_doc, "r").read()
+ work_history = f"{resume_doc}\n"
+ for doc in metadata["rag"]["documents"]:
+ work_history += f"{doc}\n"
+
+ preamble = f"""
+ [INTRO]
+ {resume_intro}
+ [/INTRO]
+
+ [WORK HISTORY]
+ {work_history}
+ [/WORK HISTORY]
+
+ [JOB DESCRIPTION]
+ {content}
+ [/JOB DESCRIPTION]
+
+ 1. Use the above [INTRO] and [WORK HISTORY] to create the resume for the [JOB DESCRIPTION].
+ 2. Do not use content from the [JOB DESCRIPTION] in the response unless the [WORK HISTORY] mentions them.
+ """
+
+ # Seed the first context messages with the resume from the 'job_description' session
+ case "resume":
+ raise Exception(f"Invalid chat type: {type}")
+
+ # Fact check the resume created by the 'job_description' using only the RAG and resume
+ case "fact_check":
+ if len(context["sessions"]["resume"]["llm_history"]) < 3: # SYSTEM, USER, **ASSISTANT**
+ yield {"status": "done", "message": "No resume history found." }
+ return
+
+ resume = context["sessions"]["resume"]["llm_history"][2]
+
+ metadata = copy.deepcopy(resume["metadata"])
+ metadata["eval_count"] = 0
+ metadata["eval_duration"] = 0
+ metadata["prompt_eval_count"] = 0
+ metadata["prompt_eval_duration"] = 0
+
+ resume_doc = open(defines.resume_doc, "r").read()
+ work_history = f"{resume_doc}\n"
+ for doc in metadata["rag"]["documents"]:
+ work_history += f"{doc}\n"
+
+ preamble = f"""
+ [WORK HISTORY]
+ {work_history}
+ [/WORK HISTORY]
+
+ [RESUME]
+ {resume['content']}
+ [/RESUME]
+ """
+ content = resume['content']
+
+ raise Exception(f"Invalid chat type: {type}")
+
+ case _:
+ raise Exception(f"Invalid chat type: {type}")
+
+ llm_history.append({"role": "user", "content": preamble + content})
+ user_history.append({"role": "user", "content": content})
+
+ if context["message_history_length"]:
+ messages = create_system_message(context["sessions"][type]["system_prompt"]) + llm_history[-context["message_history_length"]:]
+ else:
+ messages = create_system_message(context["sessions"][type]["system_prompt"]) + llm_history
- try:
# Estimate token length of new messages
- ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=llm_history[-1]["content"])
- yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
+ ctx_size = self.get_optimal_ctx_size(context["sessions"][type]["context_tokens"], messages=llm_history[-1]["content"])
+
+ processing_type = "Processing query..."
+ match type:
+ case "job_description":
+ processing_type = "Generating resume..."
+ case "fact_check":
+ processing_type = "Fact Checking resume..."
+ if len(llm_history) > 1:
+ processing_type = "Processing query..."
+
+ yield {"status": "processing", "message": processing_type, "num_ctx": ctx_size}
# Use the async generator in an async for loop
- response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ "num_ctx": ctx_size })
+ try:
+ if enable_tools:
+ response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ "num_ctx": ctx_size })
+ else:
+ response = self.client.chat(model=self.model, messages=messages, options={ "num_ctx": ctx_size })
+ except Exception as e:
+ logging.info(f"1. {messages[0]}")
+ logging.info(f"[LAST]. {messages[-1]}")
+
+ logging.exception({ "model": self.model, "error": str(e) })
+ yield {"status": "error", "message": f"An error occurred communicating with LLM"}
+ return
+
metadata["eval_count"] += response["eval_count"]
metadata["eval_duration"] += response["eval_duration"]
metadata["prompt_eval_count"] += response["prompt_eval_count"]
metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
- context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"]
+ context["sessions"][type]["context_tokens"] = response["prompt_eval_count"] + response["eval_count"]
tools_used = []
@@ -1015,7 +1114,7 @@ class WebServer:
metadata["tools"] = tools_used
# Estimate token length of new messages
- ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=messages[pre_add_index:])
+ ctx_size = self.get_optimal_ctx_size(context["sessions"][type]["context_tokens"], messages=messages[pre_add_index:])
yield {"status": "processing", "message": "Generating final response...", "num_ctx": ctx_size }
# Decrease creativity when processing tool call requests
response = self.client.chat(model=self.model, messages=messages, stream=False, options={ "num_ctx": ctx_size }) #, "temperature": 0.5 })
@@ -1023,7 +1122,7 @@ class WebServer:
metadata["eval_duration"] += response["eval_duration"]
metadata["prompt_eval_count"] += response["prompt_eval_count"]
metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
- context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"]
+ context["sessions"][type]["context_tokens"] = response["prompt_eval_count"] + response["eval_count"]
reply = response["message"]["content"]
final_message = {"role": "assistant", "content": reply }
@@ -1045,145 +1144,6 @@ class WebServer:
finally:
self.processing = False
- async def generate_resume(self, context, content):
- if not self.file_watcher:
- return
-
- content = content.strip()
- if not content:
- yield {"status": "error", "message": "Invalid request"}
- return
-
- if self.processing:
- yield {"status": "error", "message": "Busy"}
- return
-
- self.processing = True
- resume_history = context["resume_history"]
- resume = {
- "job_description": content,
- "resume": "",
- "metadata": {},
- "rag": "",
- "fact_check": {}
- }
-
- metadata = {
- "rag": {},
- "tools": [],
- "eval_count": 0,
- "eval_duration": 0,
- "prompt_eval_count": 0,
- "prompt_eval_duration": 0,
- }
- rag_docs = []
- resume_doc = open(defines.resume_doc, "r").read()
- rag_docs.append(resume_doc)
- for rag in context["rags"]:
- if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
- yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
- chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
- if chroma_results:
- rag_docs.extend(chroma_results["documents"])
- metadata["rag"] = { "name": rag["name"], **chroma_results }
- preamble = f"[INTRO]\n{resume_intro}\n[/INTRO]\n"
- preamble += f"""[WORK HISTORY]:\n"""
- for doc in rag_docs:
- preamble += f"{doc}\n"
- resume["rag"] += f"{doc}\n"
- preamble += f"\n[/WORK HISTORY]\n"
-
- content = f"""{preamble}\n
- Use the above [WORK HISTORY] and [INTRO] to create the resume for this [JOB DESCRIPTION]. Do not use the [JOB DESCRIPTION] in the generated resume unless the [WORK HISTORY] mentions them:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n"""
-
- try:
- # Estimate token length of new messages
- ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_generate_resume, content])
-
- yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
-
- # Use the async generator in an async for loop
- #
- # To support URL lookup:
- #
- # 1. Enable tools in a call to chat() with a simple prompt to invoke the tool to generate the summary if requested.
- # 2. If not requested (no tool call,) abort the path
- # 3. Otherwise, we know the URL was good and can use that URLs fetched content as context.
- #
- response = self.client.generate(model=self.model, system=system_generate_resume, prompt=content, options={ "num_ctx": ctx_size })
- metadata["eval_count"] += response["eval_count"]
- metadata["eval_duration"] += response["eval_duration"]
- metadata["prompt_eval_count"] += response["prompt_eval_count"]
- metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
- context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"]
-
- reply = response["response"]
- final_message = {"role": "assistant", "content": reply, "metadata": metadata }
-
- resume["resume"] = final_message
- resume_history.append(resume)
-
- # Return the REST API with metadata
- yield {"status": "done", "message": final_message }
-
- except Exception as e:
- logging.exception({ "model": self.model, "content": content, "error": str(e) })
- yield {"status": "error", "message": f"An error occurred: {str(e)}"}
-
- finally:
- self.processing = False
-
- async def fact_check(self, context, content):
- content = content.strip()
- if not content:
- yield {"status": "error", "message": "Invalid request"}
- return
-
- if self.processing:
- yield {"status": "error", "message": "Busy"}
- return
-
- self.processing = True
- resume_history = context["resume_history"]
- if len(resume_history) == 0:
- yield {"status": "done", "message": "No resume history found." }
- return
-
- resume = resume_history[-1]
- metadata = resume["metadata"]
- metadata["eval_count"] = 0
- metadata["eval_duration"] = 0
- metadata["prompt_eval_count"] = 0
- metadata["prompt_eval_duration"] = 0
-
- content = f"[WORK HISTORY]:{resume['rag']}[/WORK HISTORY]\n\n[RESUME]\n{resume['resume']['content']}\n[/RESUME]\n\n"
-
- try:
- # Estimate token length of new messages
- ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_fact_check, content])
- yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
- response = self.client.generate(model=self.model, system=system_fact_check, prompt=content, options={ "num_ctx": ctx_size })
- logging.info(f"Fact checking {ctx_size} tokens.")
- metadata["eval_count"] += response["eval_count"]
- metadata["eval_duration"] += response["eval_duration"]
- metadata["prompt_eval_count"] += response["prompt_eval_count"]
- metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
- context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"]
- reply = response["response"]
- final_message = {"role": "assistant", "content": reply, "metadata": metadata }
- resume["fact_check"] = final_message
-
- # Return the REST API with metadata
- yield {"status": "done", "message": final_message }
-
- except Exception as e:
- logging.exception({ "model": self.model, "content": content, "error": str(e) })
- yield {"status": "error", "message": f"An error occurred: {str(e)}"}
-
- finally:
- self.processing = False
-
-
def run(self, host="0.0.0.0", port=WEB_PORT, **kwargs):
try:
if self.ssl_enabled: