diff --git a/frontend/src/Controls.tsx b/frontend/src/Controls.tsx index 1507cfd..fb9f219 100644 --- a/frontend/src/Controls.tsx +++ b/frontend/src/Controls.tsx @@ -109,11 +109,11 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { 'Content-Type': 'application/json', 'Accept': 'application/json', }, - body: JSON.stringify({ "system-prompt": prompt }), + body: JSON.stringify({ "system_prompt": prompt }), }); const data = await response.json(); - const newPrompt = data["system-prompt"]; + const newPrompt = data["system_prompt"]; if (newPrompt !== serverSystemPrompt) { setServerSystemPrompt(newPrompt); setSystemPrompt(newPrompt) @@ -141,11 +141,11 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { 'Content-Type': 'application/json', 'Accept': 'application/json', }, - body: JSON.stringify({ "message-history-length": length }), + body: JSON.stringify({ "message_history_length": length }), }); const data = await response.json(); - const newLength = data["message-history-length"]; + const newLength = data["message_history_length"]; if (newLength !== messageHistoryLength) { setMessageHistoryLength(newLength); setSnack("Message history length updated", "success"); @@ -159,7 +159,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { sendMessageHistoryLength(messageHistoryLength); }, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]); - const reset = async (types: ("rags" | "tools" | "history" | "system-prompt" | "message-history-length")[], message: string = "Update successful.") => { + const reset = async (types: ("rags" | "tools" | "history" | "system_prompt" | "message_history_length")[], message: string = "Update successful.") => { try { const response = await fetch(connectionBase + `/api/reset/${sessionId}`, { method: 'PUT', @@ -183,9 +183,9 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { case "tools": setTools(value as Tool[]); break; - case "system-prompt": - setServerSystemPrompt((value as any)["system-prompt"].trim()); - setSystemPrompt((value as any)["system-prompt"].trim()); + case "system_prompt": + setServerSystemPrompt((value as any)["system_prompt"].trim()); + setSystemPrompt((value as any)["system_prompt"].trim()); break; case "history": console.log('TODO: handle history reset'); @@ -346,10 +346,10 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { }, }); const data = await response.json(); - const serverSystemPrompt = data["system-prompt"].trim(); + const serverSystemPrompt = data["system_prompt"].trim(); setServerSystemPrompt(serverSystemPrompt); setSystemPrompt(serverSystemPrompt); - setMessageHistoryLength(data["message-history-length"]); + setMessageHistoryLength(data["message_history_length"]); } fetchTunables(); @@ -402,7 +402,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { />
- +
@@ -481,7 +481,7 @@ const Controls = ({ sessionId, setSnack, connectionBase }: ControlsParams) => { - + ); } diff --git a/frontend/src/Conversation.tsx b/frontend/src/Conversation.tsx index 852d592..7afb54f 100644 --- a/frontend/src/Conversation.tsx +++ b/frontend/src/Conversation.tsx @@ -3,9 +3,11 @@ import TextField from '@mui/material/TextField'; import Typography from '@mui/material/Typography'; import Tooltip from '@mui/material/Tooltip'; import Button from '@mui/material/Button'; +import IconButton from '@mui/material/IconButton'; import Box from '@mui/material/Box'; import SendIcon from '@mui/icons-material/Send'; - +import ResetIcon from '@mui/icons-material/RestartAlt'; +import { SxProps, Theme } from '@mui/material'; import PropagateLoader from "react-spinners/PropagateLoader"; import { Message, MessageList, MessageData } from './Message'; @@ -14,24 +16,31 @@ import { ContextStatus } from './ContextStatus'; const loadingMessage: MessageData = { "role": "assistant", "content": "Establishing connection with server..." }; -type ConversationMode = 'chat' | 'fact-check' | 'system'; +type ConversationMode = 'chat' | 'job_description' | 'resume' | 'fact_check'; interface ConversationHandle { submitQuery: (query: string) => void; } interface ConversationProps { + className?: string, type: ConversationMode prompt: string, + actionLabel?: string, + resetAction?: () => void, + resetLabel?: string, connectionBase: string, sessionId?: string, setSnack: (message: string, severity: SeverityType) => void, defaultPrompts?: React.ReactElement[], preamble?: MessageList, hideDefaultPrompts?: boolean, + messageFilter?: (messages: MessageList) => MessageList, + messages?: MessageList, + sx?: SxProps, }; -const Conversation = forwardRef(({ prompt, type, preamble, hideDefaultPrompts, defaultPrompts, sessionId, setSnack, connectionBase }: ConversationProps, ref) => { +const Conversation = forwardRef(({ ...props }: ConversationProps, ref) => { const [query, setQuery] = useState(""); const [contextUsedPercentage, setContextUsedPercentage] = useState(0); const [processing, setProcessing] = useState(false); @@ -43,12 +52,13 @@ const Conversation = forwardRef(({ prompt const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 }); const [contextWarningShown, setContextWarningShown] = useState(false); const [noInteractions, setNoInteractions] = useState(true); + const setSnack = props.setSnack; // Update the context status const updateContextStatus = useCallback(() => { const fetchContextStatus = async () => { try { - const response = await fetch(connectionBase + `/api/context-status/${sessionId}`, { + const response = await fetch(props.connectionBase + `/api/context-status/${props.sessionId}/${props.type}`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -68,18 +78,18 @@ const Conversation = forwardRef(({ prompt } }; fetchContextStatus(); - }, [setContextStatus, connectionBase, setSnack, sessionId]); + }, [setContextStatus, props.connectionBase, setSnack, props.sessionId, props.type]); // Set the initial chat history to "loading" or the welcome message if loaded. useEffect(() => { - if (sessionId === undefined) { + if (props.sessionId === undefined) { setConversation([loadingMessage]); return; } const fetchHistory = async () => { try { - const response = await fetch(connectionBase + `/api/history/${sessionId}`, { + const response = await fetch(props.connectionBase + `/api/history/${props.sessionId}/${props.type}`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -89,12 +99,18 @@ const Conversation = forwardRef(({ prompt throw new Error(`Server responded with ${response.status}: ${response.statusText}`); } const data = await response.json(); - console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`) + console.log(`History returned from server with ${data.length} entries`) if (data.length === 0) { - setConversation(preamble || []); + setConversation([ + ...(props.preamble || []), + ...(props.messages || []), + ]); setNoInteractions(true); } else { - setConversation(data); + setConversation([ + ...(props.messages || []), + ...(props.messageFilter ? props.messageFilter(data) : data) + ]); setNoInteractions(false); } updateContextStatus(); @@ -103,10 +119,10 @@ const Conversation = forwardRef(({ prompt setSnack("Unable to obtain chat history.", "error"); } }; - if (sessionId !== undefined) { + if (props.sessionId !== undefined) { fetchHistory(); } - }, [sessionId, setConversation, updateContextStatus, connectionBase, setSnack, preamble]); + }, [props.sessionId, setConversation, updateContextStatus, props.connectionBase, setSnack, props.preamble, props.type]); const isScrolledToBottom = useCallback(()=> { // Current vertical scroll position @@ -191,6 +207,40 @@ const Conversation = forwardRef(({ prompt setContextUsedPercentage(context_used_percentage) }, [contextStatus, setContextWarningShown, contextWarningShown, setContextUsedPercentage, setSnack]); + const reset = async () => { + try { + const response = await fetch(props.connectionBase + `/api/reset/${props.sessionId}/${props.type}`, { + method: 'PUT', + headers: { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }, + body: JSON.stringify({ reset: 'history' }) + }); + + if (!response.ok) { + throw new Error(`Server responded with ${response.status}: ${response.statusText}`); + } + + if (!response.body) { + throw new Error('Response body is null'); + } + + props.messageFilter && props.messageFilter([]); + + setConversation([ + ...(props.preamble || []), + ...(props.messages || []), + ]); + + setNoInteractions(true); + + } catch (e) { + setSnack("Error resetting history", "error") + console.error('Error resetting history:', e); + } + }; + const sendQuery = async (query: string) => { setNoInteractions(false); @@ -229,7 +279,7 @@ const Conversation = forwardRef(({ prompt } // Make the fetch request with proper headers - const response = await fetch(connectionBase + `/api/chat/${sessionId}`, { + const response = await fetch(props.connectionBase + `/api/chat/${props.sessionId}/${props.type}`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -373,8 +423,12 @@ const Conversation = forwardRef(({ prompt }; return ( - - {conversation.map((message, index) => )} + + { + conversation.map((message, index) => + + ) + } (({ prompt >Estimated response time: {countdown}s )} - + setQuery(e.target.value)} onKeyDown={handleKeyPress} - placeholder={prompt} + placeholder={props.prompt} id="QueryInput" /> - - - + + { reset(); }} + > + + + + + + + + - {(noInteractions || !hideDefaultPrompts) && defaultPrompts !== undefined && defaultPrompts.length && + {(noInteractions || !props.hideDefaultPrompts) && props.defaultPrompts !== undefined && props.defaultPrompts.length && { - defaultPrompts.map((element, index) => { + props.defaultPrompts.map((element, index) => { return ({element}); }) } diff --git a/frontend/src/DocumentViewer.tsx b/frontend/src/DocumentViewer.tsx index 68d823c..dd38ad6 100644 --- a/frontend/src/DocumentViewer.tsx +++ b/frontend/src/DocumentViewer.tsx @@ -28,33 +28,21 @@ import { SxProps, Theme } from '@mui/material'; import MuiMarkdown from 'mui-markdown'; -import { Message } from './Message'; +import { Message, ChatQuery } from './Message'; import { Document } from './Document'; -import { MessageData } from './Message'; +import { MessageData, MessageList } from './Message'; import { SeverityType } from './Snack'; +import { Conversation } from './Conversation'; /** * Props for the DocumentViewer component * @interface DocumentViewerProps - * @property {function} generateResume - Function to generate a resume based on job description - * @property {MessageData | undefined} resume - The generated resume data - * @property {function} setResume - Function to set the generated resume - * @property {function} factCheck - Function to fact check the generated resume - * @property {MessageData | undefined} facts - The fact check results - * @property {function} setFacts - Function to set the fact check results - * @property {string} jobDescription - The initial job description - * @property {function} setJobDescription - Function to set the job description * @property {SxProps} [sx] - Optional styling properties + * @property {string} [connectionBase] - Base URL for fetch calls + * @property {string} [sessionId] - Session ID + * @property {(message: string, severity: SeverityType) => void} - setSnack UI callback */ export interface DocumentViewerProps { - generateResume: (jobDescription: string) => void; - resume: MessageData | undefined; - setResume: (resume: MessageData | undefined) => void; - factCheck: (resume: string) => void; - facts: MessageData | undefined; - setFacts: (facts: MessageData | undefined) => void; - jobDescription: string | undefined; - setJobDescription: (jobDescription: string | undefined) => void; sx?: SxProps; connectionBase: string; sessionId: string; @@ -67,19 +55,16 @@ export interface DocumentViewerProps { * with different layouts for mobile and desktop views. */ const DocumentViewer: React.FC = ({ - generateResume, - jobDescription, - factCheck, - resume, - setResume, - facts, - setFacts, sx, connectionBase, sessionId, setSnack }) => { // State for editing job description + const [jobDescription, setJobDescription] = useState(undefined); + const [facts, setFacts] = useState(undefined); + const [resume, setResume] = useState(undefined); + const [editJobDescription, setEditJobDescription] = useState(jobDescription); // Processing state to show loading indicators const [processing, setProcessing] = useState(undefined); @@ -122,8 +107,8 @@ const DocumentViewer: React.FC = ({ } setProcessing("resume"); setTimeout(() => { setActiveTab(1); }, 250); // Switch to resume view on mobile - generateResume(description); - }, [generateResume, setProcessing, setActiveTab, setResume]); + console.log('generateResume(description);'); + }, [/*generateResume*/, setProcessing, setActiveTab, setResume]); /** * Trigger fact check and update UI state @@ -137,9 +122,9 @@ const DocumentViewer: React.FC = ({ return; } setProcessing("facts"); - factCheck(resume); + console.log('factCheck(resume)'); setTimeout(() => { setActiveTab(2); }, 250); // Switch to resume view on mobile - }, [factCheck, setResume, setProcessing, setActiveTab, setFacts]); + }, [/*factCheck,*/ setResume, setProcessing, setActiveTab, setFacts]); useEffect(() => { setEditJobDescription(jobDescription); @@ -192,62 +177,73 @@ const DocumentViewer: React.FC = ({ triggerGeneration(editJobDescription || ""); } }; + const handleJobQuery = (query: string) => { + triggerGeneration(query); + }; - const renderJobDescriptionView = () => { - const children = []; + const jobDescriptionQuestions = [ + + + + , + ]; - if (resume === undefined && processing === undefined) { - children.push( - - setEditJobDescription(e.target.value)} - onKeyDown={handleKeyPress} - placeholder="Paste a job description, then click Generate..." - /> - - ); - } else { - children.push({editJobDescription}) + const filterJobDescriptionMessages = (messages: MessageList): MessageList => { + /* The second messages is the RESUME (the LLM response to the JOB-DESCRIPTION) */ + if (messages.length > 1) { + setResume(messages[1]); + } else if (resume !== undefined) { + setResume(undefined); } - children.push( - - { setEditJobDescription(""); triggerGeneration(undefined); }} - > - - - - - - - - - ); + /* Filter out the RESUME */ + const reduced = messages.filter((message, index) => index != 1); - return children; + /* Set the first message as coming from the assistant (rendered as markdown) */ + if (reduced.length > 0) { + reduced[0].role = 'assistant'; + } + return reduced; + }; + + const jobDescriptionMessages: MessageList = []; + + const renderJobDescriptionView = () => { + if (resume === undefined) { + return + + } else { + return + + } } /** @@ -363,7 +359,7 @@ const DocumentViewer: React.FC = ({ const otherRatio = showResume ? (100 - splitRatio / 2) : 100; const children = []; children.push( - + {renderJobDescriptionView()} ); @@ -418,7 +414,7 @@ const DocumentViewer: React.FC = ({ } return ( - + {children} @@ -428,7 +424,7 @@ const DocumentViewer: React.FC = ({ } return ( - + {getActiveDesktopContent()} ); diff --git a/frontend/src/Message.tsx b/frontend/src/Message.tsx index 8f75205..ab3cbee 100644 --- a/frontend/src/Message.tsx +++ b/frontend/src/Message.tsx @@ -233,7 +233,7 @@ const Message = ({ message, submitQuery, isFullWidth, sessionId, setSnack, conne const formattedContent = message.content.trim(); return ( - + { - const [lastEvalTPS, setLastEvalTPS] = useState(35); - const [lastPromptTPS, setLastPromptTPS] = useState(430); - const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 }); const [jobDescription, setJobDescription] = useState(undefined); - - const updateContextStatus = useCallback(() => { - fetch(connectionBase + `/api/context-status/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }) - .then(response => response.json()) - .then(data => { - setContextStatus(data); - }) - .catch(error => { - console.error('Error getting context status:', error); - setSnack("Unable to obtain context status.", "error"); - }); - }, [setContextStatus, connectionBase, setSnack, sessionId]); - - // If the jobDescription and resume have not been set, fetch them from the server - useEffect(() => { - if (sessionId === undefined) { - return; - } - if (jobDescription !== undefined) { - return; - } - const fetchResume = async () => { - try { - // Make the fetch request with proper headers - const response = await fetch(connectionBase + `/api/resume/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - }); - if (!response.ok) { - throw Error(); - } - const data: Resume[] = await response.json(); - if (data.length) { - const lastResume = data[data.length - 1]; - console.log(lastResume); - setJobDescription(lastResume['job_description']); - setResume(lastResume.resume); - if (lastResume['fact_check'] !== undefined && lastResume['fact_check'] !== null) { - lastResume['fact_check'].role = 'info'; - setFacts(lastResume['fact_check']) - } else { - setFacts(undefined) - } - } - } catch (error: any) { - setSnack("Unable to fetch resume", "error"); - console.error(error); - } - } - - fetchResume(); - }, [sessionId, resume, jobDescription, setResume, setJobDescription, setSnack, setFacts, connectionBase]); - - // const startCountdown = (seconds: number) => { - // if (timerRef.current) clearInterval(timerRef.current); - // setCountdown(seconds); - // timerRef.current = setInterval(() => { - // setCountdown((prev) => { - // if (prev <= 1) { - // clearInterval(timerRef.current); - // timerRef.current = null; - // if (isScrolledToBottom()) { - // setTimeout(() => { - // scrollToBottom(); - // }, 50) - // } - // return 0; - // } - // return prev - 1; - // }); - // }, 1000); - // }; - - // const stopCountdown = () => { - // if (timerRef.current) { - // clearInterval(timerRef.current); - // timerRef.current = null; - // setCountdown(0); - // } - // }; - if (sessionId === undefined) { return (<>); } - const generateResume = async (description: string) => { - if (!description.trim()) return; - setResume(undefined); - setFacts(undefined); - - try { - setProcessing(true); - - // Add initial processing message - //setGenerateStatus({ role: 'assistant', content: 'Processing request...' }); - - // Make the fetch request with proper headers - const response = await fetch(connectionBase + `/api/generate-resume/${sessionId}`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ content: description.trim() }), - }); - - // We'll guess that the response will be around 500 tokens... - const token_guess = 500; - const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS); - - setSnack(`Job description sent. Response estimated in ${estimate}s.`, "info"); - //startCountdown(Math.round(estimate)); - - if (!response.ok) { - throw new Error(`Server responded with ${response.status}: ${response.statusText}`); - } - - if (!response.body) { - throw new Error('Response body is null'); - } - - // Set up stream processing with explicit chunking - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - - const chunk = decoder.decode(value, { stream: true }); - - // Process each complete line immediately - buffer += chunk; - let lines = buffer.split('\n'); - buffer = lines.pop() || ''; // Keep incomplete line in buffer - for (const line of lines) { - if (!line.trim()) continue; - - try { - const update = JSON.parse(line); - - // Force an immediate state update based on the message type - if (update.status === 'processing') { - // Update processing message with immediate re-render - //setGenerateStatus({ role: 'info', content: update.message }); - console.log(update.num_ctx); - - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - - } else if (update.status === 'done') { - // Replace processing message with final result - //setGenerateStatus(undefined); - setResume(update.message); - const metadata = update.message.metadata; - const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration; - const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration; - setLastEvalTPS(evalTPS ? evalTPS : 35); - setLastPromptTPS(promptTPS ? promptTPS : 35); - updateContextStatus(); - } else if (update.status === 'error') { - // Show error - //setGenerateStatus({ role: 'error', content: update.message }); - } - } catch (e) { - setSnack("Error generating resume", "error") - console.error('Error parsing JSON:', e, line); - } - } - } - - // Process any remaining buffer content - if (buffer.trim()) { - try { - const update = JSON.parse(buffer); - - if (update.status === 'done') { - //setGenerateStatus(undefined); - setResume(update.message); - } - } catch (e) { - setSnack("Error processing job description", "error") - } - } - - //stopCountdown(); - setProcessing(false); - } catch (error) { - console.error('Fetch error:', error); - setSnack("Unable to process job description", "error"); - //setGenerateStatus({ role: 'error', content: `Error: ${error}` }); - setProcessing(false); - //stopCountdown(); - } - }; - - const factCheck = async (resume: string) => { - if (!resume.trim()) return; - setFacts(undefined); - - try { - setProcessing(true); - - const response = await fetch(connectionBase + `/api/fact-check/${sessionId}`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ content: resume.trim() }), - }); - - // We'll guess that the response will be around 500 tokens... - const token_guess = 500; - const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS); - - setSnack(`Resume sent for Fact Check. Response estimated in ${estimate}s.`, "info"); - //startCountdown(Math.round(estimate)); - - if (!response.ok) { - throw new Error(`Server responded with ${response.status}: ${response.statusText}`); - } - - if (!response.body) { - throw new Error('Response body is null'); - } - - // Set up stream processing with explicit chunking - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - - const chunk = decoder.decode(value, { stream: true }); - - // Process each complete line immediately - buffer += chunk; - let lines = buffer.split('\n'); - buffer = lines.pop() || ''; // Keep incomplete line in buffer - for (const line of lines) { - if (!line.trim()) continue; - - try { - const update = JSON.parse(line); - - // Force an immediate state update based on the message type - if (update.status === 'processing') { - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - - } else if (update.status === 'done') { - // Replace processing message with final result - update.message.role = 'info'; - setFacts(update.message); - const metadata = update.message.metadata; - const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration; - const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration; - setLastEvalTPS(evalTPS ? evalTPS : 35); - setLastPromptTPS(promptTPS ? promptTPS : 35); - updateContextStatus(); - } else if (update.status === 'error') { - } - } catch (e) { - setSnack("Error generating resume", "error") - console.error('Error parsing JSON:', e, line); - } - } - } - - // Process any remaining buffer content - if (buffer.trim()) { - try { - const update = JSON.parse(buffer); - - if (update.status === 'done') { - update.message.role = 'info'; - setFacts(update.message); - } - } catch (e) { - setSnack("Error processing resume", "error") - } - } - - //stopCountdown(); - setProcessing(false); - } catch (error) { - console.error('Fetch error:', error); - setSnack("Unable to process resume", "error"); - //setGenerateStatus({ role: 'error', content: `Error: ${error}` }); - setProcessing(false); - //stopCountdown(); - } - }; - return ( @@ -350,7 +40,7 @@ const ResumeBuilder = ({ facts, setFacts, resume, setResume, setProcessing, proc overflowY: "auto", flexDirection: "column", height: "calc(0vh - 0px)", // Hack to make the height work - }} {...{ factCheck, facts, jobDescription, generateResume, resume, setFacts, setResume, setSnack, setJobDescription, connectionBase, sessionId }} /> + }} {...{ setSnack, connectionBase, sessionId }} /> ); diff --git a/src/server.py b/src/server.py index 1def9b8..9439501 100644 --- a/src/server.py +++ b/src/server.py @@ -11,6 +11,7 @@ import uuid import subprocess import re import math +import copy def try_import(module_name, pip_name=None): try: @@ -52,6 +53,8 @@ from tools import ( tools ) +CONTEXT_VERSION=2 + rags = [ { "name": "JPK", "enabled": True, "description": "Expert data about James Ketrenos, including work history, personal hobbies, and projects." }, # { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." }, @@ -164,6 +167,8 @@ Always use tools and [{context_tag}] when possible. Be concise, and never make u """.strip() system_generate_resume = f""" +Launched on {DateTime()}. + You are a professional resume writer. Your task is to write a polished, tailored resume for a specific job based only on the individual's [WORK HISTORY]. When answering queries, follow these steps: @@ -188,9 +193,11 @@ Structure the resume professionally with the following sections where applicable Do not include any information unless it is provided in [WORK HISTORY] or [INTRO]. Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes. -""" +""".strip() system_fact_check = f""" +Launched on {DateTime()}. + You are a professional resume fact checker. Your task is to identify any inaccuracies in the [RESUME] based on the individual's [WORK HISTORY]. If there are inaccuracies, list them in a bullet point format. @@ -198,7 +205,20 @@ If there are inaccuracies, list them in a bullet point format. When answering queries, follow these steps: 1. You must not invent or assume any information not explicitly present in the [WORK HISTORY]. 2. Analyze the [RESUME] to identify any discrepancies or inaccuracies based on the [WORK HISTORY]. -""" +""".strip() + +system_job_description = f""" +Launched on {DateTime()}. + +You are a hiring and job placing specialist. Your task is to answers about a job description. + +When answering queries, follow these steps: +1. Analyze the [JOB DESCRIPTION] to provide insights for the asked question. +2. If any financial information is requested, be sure to account for inflation. +""".strip() + +def create_system_message(prompt): + return [{"role": "system", "content": prompt}] tool_log = [] command_log = [] @@ -374,6 +394,9 @@ async def handle_tool_calls(message): final_result = all_responses[0] if len(all_responses) == 1 else all_responses yield (final_result, tools_used) + + + # %% class WebServer: def __init__(self, logging, client, model=MODEL_NAME): @@ -431,71 +454,6 @@ class WebServer: return RedirectResponse(url=f"/{context['id']}", status_code=307) #return JSONResponse({"redirect": f"/{context['id']}"}) - @self.app.get("/api/query") - async def query_documents(query: str, top_k: int = 3): - if not self.file_watcher: - return - - """Query the RAG system with the given prompt.""" - results = self.file_watcher.find_similar(query, top_k=top_k) - return { - "query": query, - "results": [ - { - "content": doc, - "metadata": meta, - "distance": dist - } - for doc, meta, dist in zip( - results["documents"], - results["metadatas"], - results["distances"] - ) - ] - } - - @self.app.post("/api/refresh/{file_path:path}") - async def refresh_document(file_path: str, background_tasks: BackgroundTasks): - if not self.file_watcher: - return - - """Manually refresh a specific document in the collection.""" - full_path = os.path.join(defines.doc_dir, file_path) - - if not os.path.exists(full_path): - return {"status": "error", "message": "File not found"} - - # Schedule the update in the background - background_tasks.add_task( - self.file_watcher.process_file_update, full_path - ) - - return { - "status": "success", - "message": f"Document refresh scheduled for {file_path}" - } - - # @self.app.post("/api/refresh-all") - # async def refresh_all_documents(): - # if not self.file_watcher: - # return - - # """Refresh all documents in the collection.""" - # # Re-initialize file hashes and process all files - # self.file_watcher._initialize_file_hashes() - - # # Schedule updates for all files - # file_paths = self.file_watcher.file_hashes.keys() - # tasks = [self.file_watcher.process_file_update(path) for path in file_paths] - - # # Wait for all updates to complete - # await asyncio.gather(*tasks) - - # return { - # "status": "success", - # "message": f"Refreshed {len(file_paths)} documents", - # "document_count": file_watcher.collection.count() - # } @self.app.put("/api/umap/{context_id}") async def put_umap(context_id: str, request: Request): @@ -566,20 +524,23 @@ class WebServer: logging.error(e) #return JSONResponse({"error": str(e)}, 500) - @self.app.put("/api/reset/{context_id}") - async def put_reset(context_id: str, request: Request): + @self.app.put("/api/reset/{context_id}/{type}") + async def put_reset(context_id: str, type: str, request: Request): if not is_valid_uuid(context_id): logging.warning(f"Invalid context_id: {context_id}") return JSONResponse({"error": "Invalid context_id"}, status_code=400) context = self.upsert_context(context_id) + if type not in context["sessions"]: + return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404) + data = await request.json() try: response = {} for reset in data["reset"]: match reset: - case "system-prompt": - context["system"] = [{"role": "system", "content": system_message}] - response["system-prompt"] = { "system-prompt": system_message } + case "system_prompt": + context["sessions"][type]["system_prompt"] = system_message + response["system_prompt"] = { "system_prompt": system_message } case "rags": context["rags"] = rags.copy() response["rags"] = context["rags"] @@ -587,23 +548,23 @@ class WebServer: context["tools"] = default_tools(tools) response["tools"] = context["tools"] case "history": - context["llm_history"] = [] - context["user_history"] = [] + context["sessions"][type]["llm_history"] = [] + context["sessions"][type]["user_history"] = [] + context["sessions"][type]["context_tokens"] = round(len(str(context["system"])) * 3 / 4) # Estimate context usage response["history"] = [] - context["context_tokens"] = round(len(str(context["system"])) * 3 / 4) # Estimate context usage - response["context_used"] = context["context_tokens"] - case "message-history-length": + response["context_used"] = context["sessions"][type]["context_tokens"] + case "message_history_length": context["message_history_length"] = DEFAULT_HISTORY_LENGTH - response["message-history-length"] = DEFAULT_HISTORY_LENGTH + response["message_history_length"] = DEFAULT_HISTORY_LENGTH if not response: - return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system-prompt}"}) + return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system_prompt}"}) else: self.save_context(context_id) return JSONResponse(response) except: - return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system-prompt}"}) + return JSONResponse({ "error": "Usage: { reset: rags|tools|history|system_prompt}"}) @self.app.put("/api/tunables/{context_id}") async def put_tunables(context_id: str, request: Request): @@ -614,20 +575,20 @@ class WebServer: data = await request.json() for k in data.keys(): match k: - case "system-prompt": + case "system_prompt": system_prompt = data[k].strip() if not system_prompt: return JSONResponse({ "status": "error", "message": "System prompt can not be empty." }) context["system"] = [{"role": "system", "content": system_prompt}] self.save_context(context_id) - return JSONResponse({ "system-prompt": system_prompt }) - case "message-history-length": + return JSONResponse({ "system_prompt": system_prompt }) + case "message_history_length": value = max(0, int(data[k])) context["message_history_length"] = value self.save_context(context_id) - return JSONResponse({ "message-history-length": value }) + return JSONResponse({ "message_history_length": value }) case _: - return JSONResponse({ "error": f"Unrecognized tunable {k}"}, 404) + return JSONResponse({ "error": f"Unrecognized tunable {k}"}, status_code=404) @self.app.get("/api/tunables/{context_id}") async def get_tunables(context_id: str): @@ -636,33 +597,29 @@ class WebServer: return JSONResponse({"error": "Invalid context_id"}, status_code=400) context = self.upsert_context(context_id) return JSONResponse({ - "system-prompt": context["system"][0]["content"], - "message-history-length": context["message_history_length"] + "system_prompt": context["system"][0]["content"], + "message_history_length": context["message_history_length"] }) - @self.app.get("/api/resume/{context_id}") - async def get_resume(context_id: str): - if not is_valid_uuid(context_id): - logging.warning(f"Invalid context_id: {context_id}") - return JSONResponse({"error": "Invalid context_id"}, status_code=400) - context = self.upsert_context(context_id) - return JSONResponse(context["resume_history"]) - @self.app.get("/api/system-info/{context_id}") async def get_system_info(context_id: str): return JSONResponse(system_info(self.model)) - @self.app.post("/api/chat/{context_id}") - async def chat_endpoint(context_id: str, request: Request): + @self.app.post("/api/chat/{context_id}/{type}") + async def chat_endpoint(context_id: str, type: str, request: Request): if not is_valid_uuid(context_id): logging.warning(f"Invalid context_id: {context_id}") return JSONResponse({"error": "Invalid context_id"}, status_code=400) context = self.upsert_context(context_id) + + if type not in context["sessions"]: + return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404) + data = await request.json() # Create a custom generator that ensures flushing async def flush_generator(): - async for message in self.chat(context=context, content=data["content"]): + async for message in self.chat(context=context, type=type, content=data["content"]): # Convert to JSON and add newline yield json.dumps(message) + "\n" # Save the history as its generated @@ -681,74 +638,18 @@ class WebServer: } ) - @self.app.post("/api/generate-resume/{context_id}") - async def post_generate_resume(context_id: str, request: Request): - if not is_valid_uuid(context_id): - logging.warning(f"Invalid context_id: {context_id}") - return JSONResponse({"error": "Invalid context_id"}, status_code=400) - context = self.upsert_context(context_id) - data = await request.json() - - # Create a custom generator that ensures flushing - async def flush_generator(): - async for message in self.generate_resume(context=context, content=data["content"]): - # Convert to JSON and add newline - yield json.dumps(message) + "\n" - # Save the history as its generated - self.save_context(context_id) - # Explicitly flush after each yield - await asyncio.sleep(0) # Allow the event loop to process the write - - # Return StreamingResponse with appropriate headers - return StreamingResponse( - flush_generator(), - media_type="application/json", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it - } - ) - - @self.app.post("/api/fact-check/{context_id}") - async def post_fact_check(context_id: str, request: Request): - if not is_valid_uuid(context_id): - logging.warning(f"Invalid context_id: {context_id}") - return JSONResponse({"error": "Invalid context_id"}, status_code=400) - context = self.upsert_context(context_id) - data = await request.json() - - # Create a custom generator that ensures flushing - async def flush_generator(): - async for message in self.fact_check(context=context, content=data["content"]): - # Convert to JSON and add newline - yield json.dumps(message) + "\n" - # Save the history as its generated - self.save_context(context_id) - # Explicitly flush after each yield - await asyncio.sleep(0) # Allow the event loop to process the write - - # Return StreamingResponse with appropriate headers - return StreamingResponse( - flush_generator(), - media_type="application/json", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "X-Accel-Buffering": "no" # Prevents Nginx buffering if you"re using it - } - ) - @self.app.post("/api/context") async def create_context(): context = self.create_context() self.logging.info(f"Generated new session as {context['id']}") return JSONResponse(context) - @self.app.get("/api/history/{context_id}") - async def get_history(context_id: str): + @self.app.get("/api/history/{context_id}/{type}") + async def get_history(context_id: str, type: str): context = self.upsert_context(context_id) - return JSONResponse(context["user_history"]) + if type not in context["sessions"]: + return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404) + return JSONResponse(context["sessions"][type]["user_history"]) @self.app.get("/api/tools/{context_id}") async def get_tools(context_id: str): @@ -770,7 +671,7 @@ class WebServer: tool["enabled"] = enabled self.save_context(context_id) return JSONResponse(context["tools"]) - return JSONResponse({ "status": f"{modify} not found in tools." }), 404 + return JSONResponse({ "status": f"{modify} not found in tools." }, status_code=404) except: return JSONResponse({ "status": "error" }), 405 @@ -794,17 +695,19 @@ class WebServer: tool["enabled"] = enabled self.save_context(context_id) return JSONResponse(context["rags"]) - return JSONResponse({ "status": f"{modify} not found in tools." }), 404 + return JSONResponse({ "status": f"{modify} not found in tools." }, status_code=404) except: return JSONResponse({ "status": "error" }), 405 - @self.app.get("/api/context-status/{context_id}") - async def get_context_status(context_id): + @self.app.get("/api/context-status/{context_id}/{type}") + async def get_context_status(context_id, type: str): if not is_valid_uuid(context_id): logging.warning(f"Invalid context_id: {context_id}") return JSONResponse({"error": "Invalid context_id"}, status_code=400) context = self.upsert_context(context_id) - return JSONResponse({"context_used": context["context_tokens"], "max_context": defines.max_context}) + if type not in context["sessions"]: + return JSONResponse({ "error": f"{type} is not recognized", "context": context }, status_code=404) + return JSONResponse({"context_used": context["sessions"][type]["context_tokens"], "max_context": defines.max_context}) @self.app.get("/api/health") async def health_check(): @@ -839,15 +742,80 @@ class WebServer: # Create the full file path file_path = os.path.join(defines.session_dir, session_id) - umap_model = context.get("umap_model") - if umap_model: - del context["umap_model"] # Serialize the data to JSON and write to file with open(file_path, "w") as f: json.dump(context, f) return session_id + + def migrate_context(self, context): + # No version + # context = { + # "id": context_id, + # "tools": default_tools(tools), + # "rags": rags.copy(), + # "context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage + # "message_history_length": 5, # Number of messages to supply in context + # "system": system_context, + # "system_generate_resume": system_generate_resume, + # "llm_history": [], + # "user_history": [], + # "resume_history": [], + # } + # Version 2: + # context = { + # "version": 2, + # "id": context_id, + # "sessions": { + # **TYPE**: { # chat, job-description, resume, fact-check + # "system_prompt": **SYSTEM_MESSAGE**, + # "llm_history": [], + # "user_history": [], + # "context_tokens": round(len(str(**SYSTEM_MESSAGE**)) * 3 / 4), + # } + # }, + # "tools": default_tools(tools), + # "rags": rags.copy(), + # "message_history_length": 5 # Number of messages to supply in context + # } + if "version" not in context: + logging.info(f"Migrating {context['id']}") + context["version"] = CONTEXT_VERSION + context["sessions"] = { + "chat": { + "system_prompt": system_message, + "llm_history": context["llm_history"], + "user_history": context["user_history"], + "context_tokens": round(len(str(create_system_message(system_message)))) + }, + "job_description": { + "system_prompt": system_job_description, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(create_system_message(system_job_description)))) + }, + "resume": { + "system_prompt": system_generate_resume, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(create_system_message(system_generate_resume)))) + }, + "fact_check": { + "system_prompt": system_fact_check, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(create_system_message(system_fact_check)))) + }, + } + del context["system"] + del context["system_generate_resume"] + del context["llm_history"] + del context["user_history"] + del context["resume_history"] + + return context + def load_context(self, session_id): """ Load a serialized Python dictionary from a file in the sessions directory. @@ -868,22 +836,42 @@ class WebServer: with open(file_path, "r") as f: self.contexts[session_id] = json.load(f) - return self.contexts[session_id] + return self.migrate_context(self.contexts[session_id]) def create_context(self, context_id = None): if not context_id: context_id = str(uuid.uuid4()) - system_context = [{"role": "system", "content": system_message}]; context = { "id": context_id, - "system": system_context, - "system_generate_resume": system_generate_resume, - "llm_history": [], - "user_history": [], + "version": CONTEXT_VERSION, + "sessions": { + "chat": { + "system_prompt": system_message, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(system_message)) * 3 / 4), # Estimate context usage + }, + "job_description": { + "system_prompt": system_job_description, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(system_job_description)) * 3 / 4), # Estimate context usage + }, + "resume": { + "system_prompt": system_generate_resume, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(system_generate_resume)) * 3 / 4), # Estimate context usage + }, + "fact_check": { + "system_prompt": system_fact_check, + "llm_history": [], + "user_history": [], + "context_tokens": round(len(str(system_fact_check)) * 3 / 4), # Estimate context usage + }, + }, "tools": default_tools(tools), - "resume_history": [], "rags": rags.copy(), - "context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage "message_history_length": 5 # Number of messages to supply in context } logging.info(f"{context_id} created and added to sessions.") @@ -903,7 +891,7 @@ class WebServer: logging.info(f"Context {context_id} not found. Creating new context.") return self.load_context(context_id) - async def chat(self, context, content): + async def chat(self, context, type, content): if not self.file_watcher: return @@ -917,62 +905,173 @@ class WebServer: return self.processing = True - - llm_history = context["llm_history"] - user_history = context["user_history"] - metadata = { - "rag": {}, - "tools": [], - "eval_count": 0, - "eval_duration": 0, - "prompt_eval_count": 0, - "prompt_eval_duration": 0, - } - rag_docs = [] - for rag in context["rags"]: - if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now... - yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."} - chroma_results = self.file_watcher.find_similar(query=content, top_k=10) - if chroma_results: - rag_docs.extend(chroma_results["documents"]) - chroma_embedding = chroma_results["query_embedding"] - metadata["rag"] = { - **chroma_results, - "name": rag["name"], - "umap_embedding_2d": self.file_watcher.umap_model_2d.transform([chroma_embedding])[0].tolist(), - "umap_embedding_3d": self.file_watcher.umap_model_3d.transform([chroma_embedding])[0].tolist() - } - preamble = "" - if len(rag_docs): - preamble = f""" -1. Respond to this query: {content} -2. If there is information in this context to enhance the answer, do so: -[{context_tag}]:\n""" - for doc in rag_docs: - preamble += doc - preamble += f"\n[/{context_tag}]\nUse all of that information to respond to: " - # Figure - llm_history.append({"role": "user", "content": preamble + content}) - user_history.append({"role": "user", "content": content}) + try: + llm_history = context["sessions"][type]["llm_history"] + user_history = context["sessions"][type]["user_history"] + metadata = { + "type": type, + "rag": { "documents": [] }, + "tools": [], + "eval_count": 0, + "eval_duration": 0, + "prompt_eval_count": 0, + "prompt_eval_duration": 0, + } - if context["message_history_length"]: - messages = context["system"] + llm_history[-context["message_history_length"]:] - else: - messages = context["system"] + llm_history + # Default to not using tools + enable_tools = False + + # Default eo using RAG + enable_rag = True + + # The first time a particular session type is used, it is handled differently. After the initial pass (once the + # llm_history has more than one entry), the standard 'chat' is used. + if len(user_history) >= 1: + process_type = "chat" + # Do not enable RAG when limiting context to the job description chat + if type == "job_description": + enable_rag = False + else: + process_type = type + + if enable_rag: + for rag in context["rags"]: + if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now... + yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."} + chroma_results = self.file_watcher.find_similar(query=content, top_k=10) + if chroma_results: + chroma_embedding = chroma_results["query_embedding"] + metadata["rag"] = { + **chroma_results, + "name": rag["name"], + "umap_embedding_2d": self.file_watcher.umap_model_2d.transform([chroma_embedding])[0].tolist(), + "umap_embedding_3d": self.file_watcher.umap_model_3d.transform([chroma_embedding])[0].tolist() + } + + + match process_type: + # Normal chat interactions with context history + case "chat": + enable_tools = True + preamble = "" + rag_context = "" + for doc in metadata["rag"]["documents"]: + rag_context += doc + if rag_context: + preamble = f""" + 1. Respond to this query: {content} + 2. If there is information in this context to enhance the answer, do so: + [{context_tag}] + {rag_context} + [/{context_tag}] + Use that information to respond to: """ + + # Single job_description is provided; generate a resume + case "job_description": + # Always force the full resume to be in context + resume_doc = open(defines.resume_doc, "r").read() + work_history = f"{resume_doc}\n" + for doc in metadata["rag"]["documents"]: + work_history += f"{doc}\n" + + preamble = f""" + [INTRO] + {resume_intro} + [/INTRO] + + [WORK HISTORY] + {work_history} + [/WORK HISTORY] + + [JOB DESCRIPTION] + {content} + [/JOB DESCRIPTION] + + 1. Use the above [INTRO] and [WORK HISTORY] to create the resume for the [JOB DESCRIPTION]. + 2. Do not use content from the [JOB DESCRIPTION] in the response unless the [WORK HISTORY] mentions them. + """ + + # Seed the first context messages with the resume from the 'job_description' session + case "resume": + raise Exception(f"Invalid chat type: {type}") + + # Fact check the resume created by the 'job_description' using only the RAG and resume + case "fact_check": + if len(context["sessions"]["resume"]["llm_history"]) < 3: # SYSTEM, USER, **ASSISTANT** + yield {"status": "done", "message": "No resume history found." } + return + + resume = context["sessions"]["resume"]["llm_history"][2] + + metadata = copy.deepcopy(resume["metadata"]) + metadata["eval_count"] = 0 + metadata["eval_duration"] = 0 + metadata["prompt_eval_count"] = 0 + metadata["prompt_eval_duration"] = 0 + + resume_doc = open(defines.resume_doc, "r").read() + work_history = f"{resume_doc}\n" + for doc in metadata["rag"]["documents"]: + work_history += f"{doc}\n" + + preamble = f""" + [WORK HISTORY] + {work_history} + [/WORK HISTORY] + + [RESUME] + {resume['content']} + [/RESUME] + """ + content = resume['content'] + + raise Exception(f"Invalid chat type: {type}") + + case _: + raise Exception(f"Invalid chat type: {type}") + + llm_history.append({"role": "user", "content": preamble + content}) + user_history.append({"role": "user", "content": content}) + + if context["message_history_length"]: + messages = create_system_message(context["sessions"][type]["system_prompt"]) + llm_history[-context["message_history_length"]:] + else: + messages = create_system_message(context["sessions"][type]["system_prompt"]) + llm_history - try: # Estimate token length of new messages - ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=llm_history[-1]["content"]) - yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size} + ctx_size = self.get_optimal_ctx_size(context["sessions"][type]["context_tokens"], messages=llm_history[-1]["content"]) + + processing_type = "Processing query..." + match type: + case "job_description": + processing_type = "Generating resume..." + case "fact_check": + processing_type = "Fact Checking resume..." + if len(llm_history) > 1: + processing_type = "Processing query..." + + yield {"status": "processing", "message": processing_type, "num_ctx": ctx_size} # Use the async generator in an async for loop - response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ "num_ctx": ctx_size }) + try: + if enable_tools: + response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ "num_ctx": ctx_size }) + else: + response = self.client.chat(model=self.model, messages=messages, options={ "num_ctx": ctx_size }) + except Exception as e: + logging.info(f"1. {messages[0]}") + logging.info(f"[LAST]. {messages[-1]}") + + logging.exception({ "model": self.model, "error": str(e) }) + yield {"status": "error", "message": f"An error occurred communicating with LLM"} + return + metadata["eval_count"] += response["eval_count"] metadata["eval_duration"] += response["eval_duration"] metadata["prompt_eval_count"] += response["prompt_eval_count"] metadata["prompt_eval_duration"] += response["prompt_eval_duration"] - context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"] + context["sessions"][type]["context_tokens"] = response["prompt_eval_count"] + response["eval_count"] tools_used = [] @@ -1015,7 +1114,7 @@ class WebServer: metadata["tools"] = tools_used # Estimate token length of new messages - ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=messages[pre_add_index:]) + ctx_size = self.get_optimal_ctx_size(context["sessions"][type]["context_tokens"], messages=messages[pre_add_index:]) yield {"status": "processing", "message": "Generating final response...", "num_ctx": ctx_size } # Decrease creativity when processing tool call requests response = self.client.chat(model=self.model, messages=messages, stream=False, options={ "num_ctx": ctx_size }) #, "temperature": 0.5 }) @@ -1023,7 +1122,7 @@ class WebServer: metadata["eval_duration"] += response["eval_duration"] metadata["prompt_eval_count"] += response["prompt_eval_count"] metadata["prompt_eval_duration"] += response["prompt_eval_duration"] - context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"] + context["sessions"][type]["context_tokens"] = response["prompt_eval_count"] + response["eval_count"] reply = response["message"]["content"] final_message = {"role": "assistant", "content": reply } @@ -1045,145 +1144,6 @@ class WebServer: finally: self.processing = False - async def generate_resume(self, context, content): - if not self.file_watcher: - return - - content = content.strip() - if not content: - yield {"status": "error", "message": "Invalid request"} - return - - if self.processing: - yield {"status": "error", "message": "Busy"} - return - - self.processing = True - resume_history = context["resume_history"] - resume = { - "job_description": content, - "resume": "", - "metadata": {}, - "rag": "", - "fact_check": {} - } - - metadata = { - "rag": {}, - "tools": [], - "eval_count": 0, - "eval_duration": 0, - "prompt_eval_count": 0, - "prompt_eval_duration": 0, - } - rag_docs = [] - resume_doc = open(defines.resume_doc, "r").read() - rag_docs.append(resume_doc) - for rag in context["rags"]: - if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now... - yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."} - chroma_results = self.file_watcher.find_similar(query=content, top_k=10) - if chroma_results: - rag_docs.extend(chroma_results["documents"]) - metadata["rag"] = { "name": rag["name"], **chroma_results } - preamble = f"[INTRO]\n{resume_intro}\n[/INTRO]\n" - preamble += f"""[WORK HISTORY]:\n""" - for doc in rag_docs: - preamble += f"{doc}\n" - resume["rag"] += f"{doc}\n" - preamble += f"\n[/WORK HISTORY]\n" - - content = f"""{preamble}\n - Use the above [WORK HISTORY] and [INTRO] to create the resume for this [JOB DESCRIPTION]. Do not use the [JOB DESCRIPTION] in the generated resume unless the [WORK HISTORY] mentions them:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n""" - - try: - # Estimate token length of new messages - ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_generate_resume, content]) - - yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size} - - # Use the async generator in an async for loop - # - # To support URL lookup: - # - # 1. Enable tools in a call to chat() with a simple prompt to invoke the tool to generate the summary if requested. - # 2. If not requested (no tool call,) abort the path - # 3. Otherwise, we know the URL was good and can use that URLs fetched content as context. - # - response = self.client.generate(model=self.model, system=system_generate_resume, prompt=content, options={ "num_ctx": ctx_size }) - metadata["eval_count"] += response["eval_count"] - metadata["eval_duration"] += response["eval_duration"] - metadata["prompt_eval_count"] += response["prompt_eval_count"] - metadata["prompt_eval_duration"] += response["prompt_eval_duration"] - context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"] - - reply = response["response"] - final_message = {"role": "assistant", "content": reply, "metadata": metadata } - - resume["resume"] = final_message - resume_history.append(resume) - - # Return the REST API with metadata - yield {"status": "done", "message": final_message } - - except Exception as e: - logging.exception({ "model": self.model, "content": content, "error": str(e) }) - yield {"status": "error", "message": f"An error occurred: {str(e)}"} - - finally: - self.processing = False - - async def fact_check(self, context, content): - content = content.strip() - if not content: - yield {"status": "error", "message": "Invalid request"} - return - - if self.processing: - yield {"status": "error", "message": "Busy"} - return - - self.processing = True - resume_history = context["resume_history"] - if len(resume_history) == 0: - yield {"status": "done", "message": "No resume history found." } - return - - resume = resume_history[-1] - metadata = resume["metadata"] - metadata["eval_count"] = 0 - metadata["eval_duration"] = 0 - metadata["prompt_eval_count"] = 0 - metadata["prompt_eval_duration"] = 0 - - content = f"[WORK HISTORY]:{resume['rag']}[/WORK HISTORY]\n\n[RESUME]\n{resume['resume']['content']}\n[/RESUME]\n\n" - - try: - # Estimate token length of new messages - ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_fact_check, content]) - yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size} - response = self.client.generate(model=self.model, system=system_fact_check, prompt=content, options={ "num_ctx": ctx_size }) - logging.info(f"Fact checking {ctx_size} tokens.") - metadata["eval_count"] += response["eval_count"] - metadata["eval_duration"] += response["eval_duration"] - metadata["prompt_eval_count"] += response["prompt_eval_count"] - metadata["prompt_eval_duration"] += response["prompt_eval_duration"] - context["context_tokens"] = response["prompt_eval_count"] + response["eval_count"] - reply = response["response"] - final_message = {"role": "assistant", "content": reply, "metadata": metadata } - resume["fact_check"] = final_message - - # Return the REST API with metadata - yield {"status": "done", "message": final_message } - - except Exception as e: - logging.exception({ "model": self.model, "content": content, "error": str(e) }) - yield {"status": "error", "message": f"An error occurred: {str(e)}"} - - finally: - self.processing = False - - def run(self, host="0.0.0.0", port=WEB_PORT, **kwargs): try: if self.ssl_enabled: