UI better

2025-05-09 13:39:49 -07:00 · 2025-05-09 13:39:49 -07:00 · 92dca43979
commit 92dca43979
parent fec6efcd84
26 changed files with 2441 additions and 1193 deletions
--- a/3
+++ b/3
@ -359,7 +359,8 @@ WORKDIR /opt/ollama
 #ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250415-ubuntu.tgz

 # NOTE: NO longer at github.com/intel -- now at ipex-llm
-ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.2.0/ollama-ipex-llm-2.2.0-ubuntu.tgz
+#ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.2.0/ollama-ipex-llm-2.2.0-ubuntu.tgz
+ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250429-ubuntu.tgz
 RUN wget -qO - ${OLLAMA_VERSION} | \
    tar --strip-components=1 -C . -xzv 

--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@ -19,7 +19,9 @@
    "@types/react": "^19.0.12",
    "@types/react-dom": "^19.0.4",
    "@uiw/react-json-view": "^2.0.0-alpha.31",
-    "mui-markdown": "^1.2.6",
+    "mermaid": "^11.6.0",
+    "mui-markdown": "^2.0.1",
+    "prism-react-renderer": "^2.4.1",
    "react": "^19.0.0",
    "react-dom": "^19.0.0",
    "react-markdown": "^10.1.0",
--- a/frontend/public/docs/about-app.md
+++ b/frontend/public/docs/about-app.md
@ -0,0 +1,21 @@
+Backstory is developed using:
+
+## Frontend
+
+* React
+* MUI
+* Plotly.js
+* MuiMarkdown
+* Mermaid
+
+## Backend
+
+* Python
+* FastAPI
+* HuggingFace Transformers
+* Ollama
+* Backstory Agent Framework
+* Prometheus
+* Grafana
+* ze-monitor
+* Jupyter Notebook
--- a/frontend/public/docs/about.md
+++ b/frontend/public/docs/about.md
@ -1,15 +1,11 @@
-The backstory about Backstory...
+## Backstory is three things

-## Backstory is two things
-
-1. An interactive Q&A -- let potential employers ask questions about an individual's work history (aka "Backstory".) Based on the content the job seeker has provided to the RAG system, that can provide insights into that individual's resume and curriculum vitae that are often left out when people are trying to fit everything onto one page.
+1. **An interactive Q&A** -- let potential employers ask questions about an individual's work history (aka "Backstory".) Based on the content the job seeker has provided to the RAG system, that can provide insights into that individual's resume and curriculum vitae that are often left out when people are trying to fit everything onto one page.


-2. A resume builder -- if you have a job position, and you think this person might be a candidate, paste your job description and have a resume produced based on their data. If it looks interesting, reach out to them. If not, hopefully you've gained some insight into what drives them.
+2. **A resume builder** -- if you have a job position, and you think this person might be a candidate, paste your job description and have a resume produced based on their data. If it looks interesting, reach out to them. If not, hopefully you've gained some insight into what drives them.

-or-
-
-2. A curated expert about you -- as a potential job seeker, you can self host this environment and generate resumes for yourself.
+3. **A curated expert about you** -- as a potential job seeker, you can self host this environment and generate resumes for yourself.

  While this project was generally built for self-hosting with open source models, you can use any of the frontier models. The API adapters in this project can be configured to use infrastructure hosted from Anthropic, Google, Grok, and OpenAI (alphabetical.) For information, see [https://github.com/jketreno/backstory/README.md](https://github.com/jketreno/backstory/README.md#Frontier_Models).

@ -29,4 +25,4 @@ A. I could; but I don't want to store your data. I also don't want to have to be

 Q. <ChatQuery prompt="Why can't I just ask Backstory these questions?" tunables={{ "enable_tools": false }} />

-A. Try it. See what you find out :)
+A. Try it. See what you find out :)
--- a/frontend/public/docs/resume-generation.md
+++ b/frontend/public/docs/resume-generation.md
@ -0,0 +1,100 @@
+The system follows a carefully designed pipeline with isolated stages to prevent fabrication:
+
+## System Architecture Overview
+
+The system uses a pipeline of isolated analysis and generation steps:
+
+1. **Stage 1: Isolated Analysis** (three sub-stages)
+   - **1A: Job Analysis** - Extracts requirements from job description only
+   - **1B: Candidate Analysis** - Catalogs qualifications from resume/context only
+   - **1C: Mapping Analysis** - Identifies legitimate matches between requirements and qualifications
+
+2. **Stage 2: Resume Generation**
+   - Uses mapping output to create a tailored resume with evidence-based content
+
+3. **Stage 3: Verification**
+   - Performs fact-checking to catch any remaining fabrications
+
+```mermaid
+flowchart TD
+    subgraph "Stage 1: Isolated Analysis"
+        subgraph "Stage 1A: Job Analysis"
+            A1[Job Description Input] --> A2[Job Analysis LLM]
+            A2 --> A3[Job Requirements JSON]
+        end
+        
+        subgraph "Stage 1B: Candidate Analysis"
+            B1[Resume & Context Input] --> B2[Candidate Analysis LLM]
+            B2 --> B3[Candidate Qualifications JSON]
+        end
+        
+        subgraph "Stage 1C: Mapping Analysis"
+            C1[Job Requirements JSON] --> C2[Candidate Qualifications JSON]
+            C2 --> C3[Mapping Analysis LLM]
+            C3 --> C4[Skills Mapping JSON]
+        end
+    end
+    
+    subgraph "Stage 2: Resume Generation"
+        D1[Skills Mapping JSON] --> D2[Original Resume Reference]
+        D2 --> D3[Resume Generation LLM]
+        D3 --> D4[Tailored Resume Draft]
+    end
+    
+    subgraph "Stage 3: Verification"
+        E1[Skills Mapping JSON] --> E2[Original Materials]
+        E2 --> E3[Tailored Resume Draft]
+        E3 --> E4[Verification LLM]
+        E4 --> E5{Verification Check}
+        E5 -->|PASS| E6[Approved Resume]
+        E5 -->|FAIL| E7[Correction Instructions]
+        E7 --> D3
+    end
+    
+    A3 --> C1
+    B3 --> C2
+    C4 --> D1
+    D4 --> E3
+    
+    style A2 fill:#f9d77e,stroke:#333,stroke-width:2px
+    style B2 fill:#f9d77e,stroke:#333,stroke-width:2px
+    style C3 fill:#f9d77e,stroke:#333,stroke-width:2px
+    style D3 fill:#f9d77e,stroke:#333,stroke-width:2px
+    style E4 fill:#f9d77e,stroke:#333,stroke-width:2px
+    style E5 fill:#a3e4d7,stroke:#333,stroke-width:2px
+    style E6 fill:#aed6f1,stroke:#333,stroke-width:2px
+    style E7 fill:#f5b7b1,stroke:#333,stroke-width:2px
+```
+
+## Stage 1: Isolated Analysis (three separate sub-stages)
+
+1. **Job Analysis**: Extracts requirements from just the job description
+2. **Candidate Analysis**: Catalogs qualifications from just the resume/context
+3. **Mapping Analysis**: Identifies legitimate matches between requirements and qualifications
+
+## Stage 2: Resume Generation
+
+Creates a tailored resume using only verified information from the mapping
+
+## Stage 3: Verification
+
+1. Performs fact-checking to catch any remaining fabrications
+2. Corrects issues if needed and re-verifies
+
+### Key Anti-Fabrication Mechanisms
+
+The system uses several techniques to prevent fabrication:
+
+* **Isolation of Analysis Stages**: By analyzing the job and candidate separately, the system prevents the LLM from prematurely creating connections that might lead to fabrication.
+* **Evidence Requirements**: Each qualification included must have explicit evidence from the original materials.
+* **Conservative Transferability**: The system is instructed to be conservative when claiming skills are transferable.
+* **Verification Layer**: A dedicated verification step acts as a safety check to catch any remaining fabrications.
+* **Strict JSON Structures**: Using structured JSON formats ensures information flows properly between stages.
+
+## Implementation Details
+
+* **Prompt Engineering**: Each stage has carefully designed prompts with clear instructions and output formats.
+* **Error Handling**: Comprehensive validation and error handling throughout the pipeline.
+* **Correction Loop**: If verification fails, the system attempts to correct issues and re-verify.
+* **Traceability**: Information in the final resume can be traced back to specific evidence in the original materials.
+
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -19,7 +19,7 @@ import { SxProps } from '@mui/material';


 import { ResumeBuilder } from './ResumeBuilder';
-import { Message, MessageList } from './Message';
+import { MessageList } from './Message';
 import { Snack, SeverityType } from './Snack';
 import { VectorVisualizer } from './VectorVisualizer';
 import { Controls } from './Controls';
@ -27,6 +27,7 @@ import { Conversation, ConversationHandle } from './Conversation';
 import { ChatQuery, QueryOptions } from './ChatQuery';
 import { Scrollable } from './Scrollable';
 import { BackstoryTab } from './BackstoryTab';
+import { Document } from './Document';

 import './App.css';
 import './Conversation.css';
@ -52,6 +53,8 @@ const connectionBase = getConnectionBase(window.location);
 interface TabProps {
  label?: string,
  path: string,
+  route?: string,
+  children?: any,
  tabProps?: {
    label?: string,
    sx?: SxProps,
@ -70,7 +73,6 @@ const App = () => {
  const [menuOpen, setMenuOpen] = useState(false);
  const [isMenuClosing, setIsMenuClosing] = useState(false);
  const [activeTab, setActiveTab] = useState<number>(0);
-  const [about, setAbout] = useState<string>("");
  const isDesktop = useMediaQuery('(min-width:650px)');
  const prevIsDesktopRef = useRef<boolean>(isDesktop);
  const chatRef = useRef<ConversationHandle>(null);
@ -93,40 +95,18 @@ const App = () => {
    snackRef.current?.setSnack(message, severity);
  }, [snackRef]);

-  // Get the About markdown
-  useEffect(() => {
-    if (about !== "") {
-      return;
-    }
-    const fetchAbout = async () => {
-      try {
-        const response = await fetch("/docs/about.md", {
-          method: 'GET',
-          headers: {
-            'Content-Type': 'application/json',
-          },
-        });
-        if (!response.ok) {
-          throw Error("/docs/about.md not found");
-        } 
-        const data = await response.text();
-        setAbout(data);
-      } catch (error: any) {
-        console.error('Error obtaining About content information:', error);
-        setAbout("No information provided.");
-      };
-    };
-
-    fetchAbout();
-  }, [about, setAbout])
-
  const handleSubmitChatQuery = (prompt: string, tunables?: QueryOptions) => {
    console.log(`handleSubmitChatQuery: ${prompt} ${tunables || {}} -- `, chatRef.current ? ' sending' : 'no handler');
    chatRef.current?.submitQuery(prompt, tunables);
    setActiveTab(0);
  };

+  const onDocumentExpand = (document: string) => {
+    console.log("Document expanded:", document);
+  }
+
  const tabs: TabProps[] = useMemo(() => {
+    console.log(document);
    const backstoryPreamble: MessageList = [
      {
        role: 'content',
@ -235,22 +215,36 @@ const App = () => {
              p: 1,
            }}
          >
-            <Message
-              {...{
-                sx: {
-                  display: 'flex',
-                  flexDirection: 'column',
-                  p: 1,
-                  m: 0,
-                  flexGrow: 0,
-                },
-                message: { role: 'content', title: "About Backstory", content: about },
-                submitQuery: handleSubmitChatQuery,
-                connectionBase,
-                sessionId,
-                setSnack
-              }} />
-            <Box sx={{ display: "flex", flexGrow: 1, p: 0, m: 0 }} />
+            <Document {...{
+              title: "About",
+              filepath: "/docs/about.md",
+              onExpand: () => { onDocumentExpand('about'); },
+              expanded: false,//openDocument === 'about',
+              sessionId,
+              connectionBase,
+              submitQuery: handleSubmitChatQuery,
+              setSnack,
+            }} />
+            <Document {...{
+              title: "Resume Generation Architecture",
+              filepath: "/docs/resume-generation.md",
+              onExpand: () => { onDocumentExpand('resume-generation'); },
+              expanded: false, //openDocument === 'about',
+              sessionId,
+              connectionBase,
+              submitQuery: handleSubmitChatQuery,
+              setSnack,
+            }} />
+            <Document {...{
+              title: "Application Architecture",
+              filepath: "/docs/about-app.md",
+              onExpand: () => { onDocumentExpand('resume-generation'); },
+              expanded: false, //openDocument === 'about-app',
+              sessionId,
+              connectionBase,
+              submitQuery: handleSubmitChatQuery,
+              setSnack,
+            }} />
          </Scrollable>
        )
      }, {
@ -276,10 +270,9 @@ const App = () => {
          </Scrollable>
        )
      }];
-  }, [about, sessionId, setSnack, isMobile]);
+  }, [sessionId, setSnack, isMobile]);

  const fetchSession = useCallback((async (pathParts?: string[]) => {
-
    try {
      const response = await fetch(connectionBase + `/api/context`, {
        method: 'POST',
@ -307,6 +300,7 @@ const App = () => {
          setActiveTab(0);
        } else {
          window.history.replaceState({}, '', `/${pathParts.join('/')}/${new_session}`);
+          tabs[tabIndex].route = pathParts[2] || "";
          setActiveTab(tabIndex);
        }
        setSessionId(new_session);
@ -329,13 +323,14 @@ const App = () => {
      const path_session = pathParts.length < 2 ? pathParts[0] : pathParts[1];
      if (!isValidUUIDv4(path_session)) {
        console.log(`Invalid session id ${path_session}-- creating new session`);
-        fetchSession(pathParts);
+        fetchSession([pathParts[0]]);
      } else {
        let tabIndex = tabs.findIndex((tab) => tab.path === currentPath);
        if (-1 === tabIndex) {
          console.log(`Invalid path "${currentPath}" -- redirecting to default`);
          tabIndex = 0;
        }
+        tabs[tabIndex].route = pathParts[2] || ""
        setSessionId(path_session);
        setActiveTab(tabIndex);
      }
@ -363,11 +358,15 @@ const App = () => {
    }
    setActiveTab(newValue);
    const tabPath = tabs[newValue].path;
+    let path = `/${sessionId}`;
    if (tabPath) {
-      window.history.pushState({}, '', `/${tabPath}/${sessionId}`);
-    } else {
-      window.history.pushState({}, '', `/${sessionId}`);
+      // if (openDocument) {
+      //   path = `/${tabPath}/${openDocument}/${sessionId}`;
+      // } else {
+      path = `/${tabPath}/${sessionId}`;
+      // }
    }
+    window.history.pushState({}, '', path);
    handleMenuClose();
  };

--- a/frontend/src/BackstoryTextField.css
+++ b/frontend/src/BackstoryTextField.css
@ -0,0 +1,25 @@
+
+.BackstoryTextField:disabled {
+  opacity: 0.38;
+}
+
+/* .BackstoryTextField:not(:active):not(:focus):not(:hover) {
+} */
+
+.BackstoryTextField::placeholder {
+  color: rgba(46, 46, 46, 0.38);
+}
+
+.BackstoryTextField:focus,
+.BackstoryTextField:active {
+  outline: 2px solid black;
+}
+
+.BackstoryTextField:hover:not(:active):not(:focus) {
+  outline: 1px solid black;
+}
+
+.BackstoryTextField {
+  outline: 1px solid rgba(46, 46, 46, 0.38);
+  border: none;
+}
--- a/frontend/src/ChatBubble.tsx
+++ b/frontend/src/ChatBubble.tsx
@ -1,4 +1,4 @@
-import React from 'react';
+import React, { useState } from 'react';
 import { Box } from '@mui/material';
 import { useTheme } from '@mui/material/styles';
 import { SxProps, Theme } from '@mui/material';
@ -14,15 +14,18 @@ import { ErrorOutline, InfoOutline, Memory, Psychology, /* Stream, */ } from '@m
 interface ChatBubbleProps {
  role: MessageRoles,
  isInfo?: boolean;
-  isFullWidth?: boolean;
  children: React.ReactNode;
  sx?: SxProps<Theme>;
  className?: string;
  title?: string;
+  expanded?: boolean;
+  onExpand?: () => void;
 }

 function ChatBubble(props: ChatBubbleProps) {
-  const { role, isFullWidth, children, sx, className, title }: ChatBubbleProps = props;
+  const { role, children, sx, className, title, onExpand }: ChatBubbleProps = props;
+  const [expanded, setExpanded] = useState<boolean>((props.expanded === undefined) ? true : props.expanded);
+
  const theme = useTheme();

  const defaultRadius = '16px';
@ -31,7 +34,8 @@ function ChatBubble(props: ChatBubbleProps) {
    fontSize: '0.875rem',
    alignSelf: 'flex-start', // Left-aligned is used by default
    maxWidth: '100%',
-    minWidth: '80%',
+    minWidth: '100%',
+    height: 'fit-content',
    '& > *': {
      color: 'inherit', // Children inherit 'color' from parent
      overflow: 'hidden',
@ -65,7 +69,8 @@ function ChatBubble(props: ChatBubbleProps) {
      backgroundColor: '#EDEAE0', // Soft warm gray that plays nice with #D3CDBF
      border: `1px dashed ${theme.palette.custom.highlight}`, // Golden Ochre
      borderRadius: defaultRadius,
-      maxWidth: isFullWidth ? '100%' : '90%',
+      maxWidth: '90%',
+      minWidth: '90%',
      alignSelf: 'center',
      color: theme.palette.text.primary, // Charcoal Black
      fontStyle: 'italic',
@ -83,7 +88,8 @@ function ChatBubble(props: ChatBubbleProps) {
      backgroundColor: 'rgba(74, 122, 125, 0.15)', // Translucent dusty teal
      border: `1px solid ${theme.palette.secondary.light}`, // Lighter dusty teal
      borderRadius: '4px',
-      maxWidth: isFullWidth ? '100%' : '75%',
+      maxWidth: '75%',
+      minWidth: '75%',
      alignSelf: 'center',
      color: theme.palette.secondary.dark, // Darker dusty teal for text
      fontWeight: 500, // Slightly bolder than normal
@ -97,7 +103,8 @@ function ChatBubble(props: ChatBubbleProps) {
      backgroundColor: '#F8E7E7', // Soft light red background
      border: `1px solid #D83A3A`, // Prominent red border
      borderRadius: defaultRadius,
-      maxWidth: isFullWidth ? '100%' : '90%',
+      maxWidth: '90%',
+      minWidth: '90%',
      alignSelf: 'center',
      color: '#8B2525', // Deep red text for good contrast
      padding: '10px 16px',
@ -108,7 +115,6 @@ function ChatBubble(props: ChatBubbleProps) {
      backgroundColor: '#F5F2EA', // Light cream background for easy reading
      border: `1px solid ${theme.palette.custom.highlight}`, // Golden Ochre border
      borderRadius: 0,
-      maxWidth: '100%', // Full width to maximize reading space
      alignSelf: 'center', // Centered in the chat
      color: theme.palette.text.primary, // Charcoal Black for maximum readability
      padding: '8px 8px', // More generous padding for better text framing
@ -145,8 +151,9 @@ function ChatBubble(props: ChatBubbleProps) {
  if (role === 'content' && title) {
    return (
      <Accordion
-        defaultExpanded
+        expanded={expanded}
        className={className}
+        onChange={() => { onExpand && onExpand(); setExpanded(!expanded); }}
        sx={{ ...styles[role], ...sx }}
      >
        <AccordionSummary
--- a/frontend/src/Controls.tsx
+++ b/frontend/src/Controls.tsx
@ -1,17 +1,17 @@
 import React, { useState, useEffect, ReactElement } from 'react';
-import FormGroup from '@mui/material/FormGroup';
-import FormControlLabel from '@mui/material/FormControlLabel';
-import Switch from '@mui/material/Switch';
-import Divider from '@mui/material/Divider';
-import TextField from '@mui/material/TextField';
+// import FormGroup from '@mui/material/FormGroup';
+// import FormControlLabel from '@mui/material/FormControlLabel';
+// import Switch from '@mui/material/Switch';
+// import Divider from '@mui/material/Divider';
+// import TextField from '@mui/material/TextField';
 import Accordion from '@mui/material/Accordion';
 import AccordionActions from '@mui/material/AccordionActions';
 import AccordionSummary from '@mui/material/AccordionSummary';
 import AccordionDetails from '@mui/material/AccordionDetails';
 import Typography from '@mui/material/Typography';
-import Button from '@mui/material/Button';
-import Box from '@mui/material/Box';
-import ResetIcon from '@mui/icons-material/History';
+// import Button from '@mui/material/Button';
+// import Box from '@mui/material/Box';
+// import ResetIcon from '@mui/icons-material/History';
 import ExpandMoreIcon from '@mui/icons-material/ExpandMore';

 import { SetSnackType } from './Snack';
--- a/frontend/src/Conversation.tsx
+++ b/frontend/src/Conversation.tsx
@ -5,8 +5,7 @@ import Button from '@mui/material/Button';
 import Box from '@mui/material/Box';
 import SendIcon from '@mui/icons-material/Send';
 import { SxProps, Theme } from '@mui/material';
-import PropagateLoader  from "react-spinners/PropagateLoader";
-import { useTheme } from '@mui/material/styles';
+import PropagateLoader from "react-spinners/PropagateLoader";

 import { Message, MessageList, MessageData } from './Message';
 import { SetSnackType } from './Snack';
@ -23,13 +22,16 @@ type ConversationMode = 'chat' | 'job_description' | 'resume' | 'fact_check';

 interface ConversationHandle {
  submitQuery: (prompt: string, options?: QueryOptions) => void;
+  fetchHistory: () => void;
 }

 interface BackstoryMessage {
  prompt: string;
  preamble: {};
+  status: string;
  full_content: string;
-  response: string;
+  response: string;     // Set when status === 'done' or 'error'
+  chunk: string;        // Used when status === 'streaming'
  metadata: {
    rag: { documents: [] };
    tools: string[];
@ -62,7 +64,7 @@ interface ConversationProps {
  messageFilter?: ((messages: MessageList) => MessageList) | undefined, // Filter callback to determine which Messages to display in Conversation
  messages?: MessageList,     // 
  sx?: SxProps<Theme>,
-  onResponse?: ((message: MessageData) => MessageData) | undefined, // Event called when a query completes (provides messages)
+  onResponse?: ((message: MessageData) => void) | undefined, // Event called when a query completes (provides messages)
 };

 const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
@ -96,14 +98,11 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
  const [processingMessage, setProcessingMessage] = useState<MessageData | undefined>(undefined);
  const [streamingMessage, setStreamingMessage] = useState<MessageData | undefined>(undefined);
  const timerRef = useRef<any>(null);
-  const [lastEvalTPS, setLastEvalTPS] = useState<number>(35);
-  const [lastPromptTPS, setLastPromptTPS] = useState<number>(430);
  const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
  const [contextWarningShown, setContextWarningShown] = useState<boolean>(false);
  const [noInteractions, setNoInteractions] = useState<boolean>(true);
  const conversationRef = useRef<MessageList>([]);
  const viewableElementRef = useRef<HTMLDivElement>(null);
-  const theme = useTheme();

  // Keep the ref updated whenever items changes
  useEffect(() => {
@ -163,14 +162,9 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
    };
  }, [conversation, setFilteredConversation, messageFilter, preamble, messages, hidePreamble]);

-  // Set the initial chat history to "loading" or the welcome message if loaded.
-  useEffect(() => {
-    if (sessionId === undefined) {
-      setProcessingMessage(loadingMessage);
-      return;
-    }
-
-    const fetchHistory = async () => {
+  const fetchHistory = useCallback(async () => {
+    let retries = 5;
+    while (--retries > 0) {
      try {
        const response = await fetch(connectionBase + `/api/history/${sessionId}/${type}`, {
          method: 'GET',
@ -194,43 +188,42 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({

          const backstoryMessages: BackstoryMessage[] = messages;

-          // type MessageData = {
-          //   role: MessageRoles,
-          //   content: string,
-          //   disableCopy?: boolean,
-          //   user?: string,
-          //   title?: string,
-          //   origin?: string,
-          //   display?: string, /* Messages generated on the server for filler should not be shown */
-          //   id?: string,
-          //   isProcessing?: boolean,
-          //   metadata?: MessageMetaData
-          // };
          setConversation(backstoryMessages.flatMap((backstoryMessage: BackstoryMessage) => [{
            role: 'user',
            content: backstoryMessage.prompt || "",
          }, {
            ...backstoryMessage,
-            role: 'assistant',
-            content: backstoryMessage.response || "",
-          }] as MessageList));
+              role: backstoryMessage.status === "done" ? "assistant" : backstoryMessage.status,
+              content: backstoryMessage.response || "",
+            }] as MessageList));
          setNoInteractions(false);
        }
        setProcessingMessage(undefined);
        setStreamingMessage(undefined);
        updateContextStatus();
+        return;
+
      } catch (error) {
        console.error('Error generating session ID:', error);
-        setProcessingMessage({ role: "error", content: "Unable to obtain history from server." });
+        setProcessingMessage({ role: "error", content: `Unable to obtain history from server. Retrying in 3 seconds (${retries} remain.)` });
        setTimeout(() => {
          setProcessingMessage(undefined);
-        }, 5000);
+        }, 3000);
+        await new Promise(resolve => setTimeout(resolve, 3000));
        setSnack("Unable to obtain chat history.", "error");
      }
    };
+  }, [setConversation, updateContextStatus, connectionBase, setSnack, type, sessionId]);
+
+  // Set the initial chat history to "loading" or the welcome message if loaded.
+  useEffect(() => {
+    if (sessionId === undefined) {
+      setProcessingMessage(loadingMessage);
+      return;
+    }

    fetchHistory();
-  }, [setConversation, setFilteredConversation, updateContextStatus, connectionBase, setSnack, type, sessionId]);
+  }, [fetchHistory, sessionId, setProcessing]);

  const startCountdown = (seconds: number) => {
    if (timerRef.current) clearInterval(timerRef.current);
@ -264,7 +257,8 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
  useImperativeHandle(ref, () => ({
    submitQuery: (query: string, tunables?: QueryOptions) => {
      sendQuery(query, tunables);
-    }
+    },
+    fetchHistory: () => { return fetchHistory(); }
  }));

  // If context status changes, show a warning if necessary. If it drops
@ -376,12 +370,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
        body: JSON.stringify(query)
      });

-      // We'll guess that the response will be around 500 tokens...
-      const token_guess = 500;
-      const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS);
-
-      setSnack(`Query sent. Response estimated in ${estimate}s.`, "info");
-      startCountdown(Math.round(estimate));
+      setSnack(`Query sent.`, "info");

      if (!response.ok) {
        throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
@ -391,6 +380,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
        throw new Error('Response body is null');
      }

+      let streaming_response = ""
      // Set up stream processing with explicit chunking
      const reader = response.body.getReader();
      const decoder = new TextDecoder();
@ -402,10 +392,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
        switch (update.status) {
          case 'done':
            console.log('Done processing:', update);
-            // Replace processing message with final result
-            if (onResponse) {
-              update = onResponse(update);
-            }
+            stopCountdown();
            setStreamingMessage(undefined);
            setProcessingMessage(undefined);
            const backstoryMessage: BackstoryMessage = update;
@ -421,12 +408,12 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({

            const metadata = update.metadata;
            if (metadata) {
-              const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
-              const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
-              setLastEvalTPS(evalTPS ? evalTPS : 35);
-              setLastPromptTPS(promptTPS ? promptTPS : 35);
              updateContextStatus();
            }
+
+            if (onResponse) {
+              onResponse(update);
+            }
            break;
          case 'error':
            // Show error
@ -438,6 +425,9 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
                content: update.response || "",
              }] as MessageList);

+            setProcessing(false);
+            stopCountdown();
+
            // Add a small delay to ensure React has time to update the UI
            await new Promise(resolve => setTimeout(resolve, 0));
            break;
@ -445,10 +435,14 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
            // Force an immediate state update based on the message type
            // Update processing message with immediate re-render
            if (update.status === "streaming") {
-              setStreamingMessage({ role: update.status, content: update.response });
+              streaming_response += update.chunk
+              setStreamingMessage({ role: update.status, content: streaming_response });
            } else {
              setProcessingMessage({ role: update.status, content: update.response });
+              /* Reset stream on non streaming message */
+              streaming_response = ""
            }
+            startCountdown(Math.ceil(update.remaining_time));
            // Add a small delay to ensure React has time to update the UI
            await new Promise(resolve => setTimeout(resolve, 0));
            break;    
@ -550,7 +544,7 @@ const Conversation = forwardRef<ConversationHandle, ConversationProps>(({
              fontSize: "0.7rem",
              color: "darkgrey"
            }}
-          >Estimated response time: {countdown}s</Box>
+          >Response will be stopped in: {countdown}s</Box>
        )}
      </Box>
      <Box className="Query" sx={{ display: "flex", flexDirection: "column", p: 1 }}>
--- a/frontend/src/Document.tsx
+++ b/frontend/src/Document.tsx
@ -0,0 +1,74 @@
+import React, { useState, useEffect } from 'react';
+import { Message, MessageSubmitQuery } from './Message';
+import { SetSnackType } from './Snack';
+
+interface DocumentProps {
+  title: string;
+  expanded?: boolean;
+  filepath: string;
+  setSnack: SetSnackType;
+  submitQuery?: MessageSubmitQuery;
+  connectionBase: string;
+  disableCopy?: boolean;
+  onExpand?: () => void;
+}
+
+const Document = (props: DocumentProps) => {
+  const { setSnack, submitQuery, connectionBase, filepath, title, expanded, disableCopy, onExpand } = props;
+
+  const [document, setDocument] = useState<string>("");
+
+  // Get the markdown
+  useEffect(() => {
+    if (document !== "") {
+      return;
+    }
+    const fetchDocument = async () => {
+      try {
+        const response = await fetch(filepath, {
+          method: 'GET',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+        });
+        if (!response.ok) {
+          throw Error(`${filepath} not found.`);
+        } 
+        const data = await response.text();
+        setDocument(data);
+      } catch (error: any) {
+        console.error('Error obtaining About content information:', error);
+        setDocument(`${filepath} not found.`);
+      };
+    };
+
+    fetchDocument();
+  }, [document, setDocument, filepath])
+
+  return (
+    <>
+    <Message
+      {...{
+        sx: {
+          display: 'flex',
+          flexDirection: 'column',
+          p: 1,
+          m: 0,
+          flexGrow: 0,
+        },
+        message: { role: 'content', title: title, content: document },
+        connectionBase,
+        submitQuery,
+        setSnack,
+        expanded,
+        disableCopy,
+        onExpand,
+      }} />
+    {/* <Box sx={{ display: "flex", flexGrow: 1, p: 0, m: 0 }} /> */}
+    </>
+  );
+};
+
+export {
+  Document
+};
--- a/frontend/src/Mermaid.tsx
+++ b/frontend/src/Mermaid.tsx
@ -0,0 +1,63 @@
+import React, { useEffect, useRef, useState, useCallback } from 'react';
+import mermaid, { MermaidConfig } from 'mermaid';
+import { SxProps } from '@mui/material/styles';
+import { Box } from '@mui/material';
+import { useResizeObserverAndMutationObserver } from './useAutoScrollToBottom';
+
+const defaultMermaidConfig : MermaidConfig = {
+  startOnLoad: true,
+  securityLevel: 'loose',
+  fontFamily: 'Fira Code',
+};
+
+interface MermaidProps {
+  chart: string;
+  sx?: SxProps;
+  className?: string;
+  mermaidConfig?: MermaidConfig;
+}
+
+const Mermaid: React.FC<MermaidProps> = (props: MermaidProps) => {
+  const { chart, sx, className, mermaidConfig } = props;
+  const [ visible, setVisible] = useState<boolean>(false);
+  const containerRef = useRef<HTMLDivElement>(null);
+
+  const checkVisible = useCallback(() => {
+    if (containerRef.current) {
+      const { width, height } = containerRef.current.getBoundingClientRect();
+      if (width > 0 && height > 0) {
+        setVisible(true);
+      }
+    }
+  }, [containerRef, setVisible]);
+
+  useEffect(() => {
+    const renderMermaid = async () => {
+      if (containerRef.current && visible && chart) {
+        try {
+          console.log("Rendering Mermaid");
+          await mermaid.initialize(mermaidConfig || defaultMermaidConfig);
+          await mermaid.run({ nodes: [containerRef.current] });
+        } catch (e) {
+          console.error("Mermaid render error:", e, containerRef.current);
+        }
+      }
+    }
+    renderMermaid();
+  }, [containerRef, mermaidConfig, visible, chart]);
+
+  // Observe container and TextField size, plus DOM changes
+  useResizeObserverAndMutationObserver(containerRef, null, checkVisible);
+
+  return <Box className={className || "Mermaid"} ref={containerRef} sx={{
+    display: "flex",
+    flexGrow: 1,
+    ...sx
+  }}>
+    {chart}
+  </Box>;
+};
+
+export {
+  Mermaid
+};
--- a/frontend/src/Message.tsx
+++ b/frontend/src/Message.tsx
@ -27,13 +27,15 @@ import { StyledMarkdown } from './StyledMarkdown';
 import { VectorVisualizer } from './VectorVisualizer';
 import { SetSnackType } from './Snack';
 import { CopyBubble } from './CopyBubble';
+import { Scrollable } from './Scrollable';

-type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error' | 'content' | 'thinking' | 'processing';
+type MessageRoles = 'info' | 'user' | 'assistant' | 'system' | 'status' | 'error' | 'content' | 'thinking' | 'processing' | 'streaming';

 type MessageData = {
  role: MessageRoles,
  content: string,
-
+  status?: string, // streaming, done, error...
+  response?: string,
  disableCopy?: boolean,
  user?: string,
  title?: string,
@ -41,6 +43,7 @@ type MessageData = {
  display?: string, /* Messages generated on the server for filler should not be shown */
  id?: string,
  isProcessing?: boolean,
+  actions?: string[],
  metadata?: MessageMetaData
 };

@ -63,13 +66,16 @@ interface MessageMetaData {
  setSnack: SetSnackType,
 }

+type MessageSubmitQuery = (text: string) => void;
+
 type MessageList = MessageData[];

 interface MessageProps {
  sx?: SxProps<Theme>,
  message: MessageData,
-  isFullWidth?: boolean,
-  submitQuery?: (text: string) => void,
+  expanded?: boolean,
+  onExpand?: () => void,
+  submitQuery?: MessageSubmitQuery,
  sessionId?: string,
  connectionBase: string,
  setSnack: SetSnackType,
@ -101,11 +107,6 @@ const MessageMeta = (props: MessageMetaProps) => {
  return (<>
    {
      prompt_eval_duration !== 0 && eval_duration !== 0 && <>
-        <Box sx={{ fontSize: "0.8rem", mb: 1 }}>
-          Below is the LLM performance of this query. Note that if tools are called, the
-          entire context is processed for each separate tool request by the LLM. This
-          can dramatically increase the total time for a response.
-        </Box>
        <TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
          <Table aria-label="prompt stats" size="small">
            <TableHead>
@ -224,7 +225,8 @@ const MessageMeta = (props: MessageMetaProps) => {
 };

 const Message = (props: MessageProps) => {
-  const { message, submitQuery, isFullWidth, sx, className } = props;
+  const { message, submitQuery, sx, className, onExpand } = props;
+  const messageExpanded = props.expanded;
  const [expanded, setExpanded] = useState<boolean>(false);
  const textFieldRef = useRef(null);

@ -241,14 +243,15 @@ const Message = (props: MessageProps) => {
    return (<></>);
  }

-  const formattedContent = message.content.trim();
+  const formattedContent = message.content.trim() || "Waiting for LLM to spool up...";

  return (
    <ChatBubble
      className={className || "Message"}
-      isFullWidth={isFullWidth}
      role={message.role}
      title={message.title}
+      expanded={messageExpanded}
+      onExpand={onExpand}
      sx={{
        display: "flex",
        flexDirection: "column",
@ -261,10 +264,21 @@ const Message = (props: MessageProps) => {
      }}>
      <CardContent ref={textFieldRef} sx={{ position: "relative", display: "flex", flexDirection: "column", overflowX: "auto", m: 0, p: 0, paddingBottom: '0px !important' }}>
        {message.role !== 'user' ?
-          <StyledMarkdown
+          <Scrollable
            className="MessageContent"
-            sx={{ display: "flex", color: 'text.secondary' }}
-            {...{ content: formattedContent, submitQuery }} />
+            autoscroll
+            fallbackThreshold={0.5}
+            sx={{
+              p: 0,
+              m: 0,
+              maxHeight: (message.role === "streaming") ? "20rem" : "unset",
+              display: "flex",
+              flexGrow: 1,
+              overflow: "auto", /* Handles scrolling for the div */
+            }}
+          >
+            <StyledMarkdown {...{ content: formattedContent, submitQuery }} />
+          </Scrollable>
           :
          <Typography
            className="MessageContent"
@ -276,7 +290,7 @@ const Message = (props: MessageProps) => {
        }
      </CardContent>
      <CardActions disableSpacing sx={{ display: "flex", flexDirection: "row", justifyContent: "space-between", alignItems: "center", width: "100%", p: 0, m: 0 }}>
-        {message.disableCopy === undefined && ["assistant", "content"].includes(message.role) && <CopyBubble content={message.content} />}
+        {(message.disableCopy === undefined || message.disableCopy === false) && ["assistant", "content"].includes(message.role) && <CopyBubble content={message.content} />}
        {message.metadata && (
          <Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
            <Button variant="text" onClick={handleExpandClick} sx={{ color: "darkgrey", p: 0 }}>
@ -308,11 +322,12 @@ export type {
  MessageProps, 
  MessageList,
  MessageData,
-  MessageRoles
+  MessageRoles,
+  MessageSubmitQuery
 };

 export  {
  Message,
-  MessageMeta
+  MessageMeta,
 };

--- a/frontend/src/ResumeBuilder.tsx
+++ b/frontend/src/ResumeBuilder.tsx
@ -4,7 +4,6 @@ import {
  Tab,
  Box,
 } from '@mui/material';
-import { useTheme } from '@mui/material/styles';
 import { SxProps, Theme } from '@mui/material';

 import { ChatQuery } from './ChatQuery';
@ -36,7 +35,6 @@ const ResumeBuilder: React.FC<ResumeBuilderProps> = ({
  const [hasJobDescription, setHasJobDescription] = useState<boolean>(false);
  const [hasResume, setHasResume] = useState<boolean>(false);
  const [hasFacts, setHasFacts] = useState<boolean>(false);
-  const theme = useTheme();
  const jobConversationRef = useRef<any>(null);
  const resumeConversationRef = useRef<any>(null);
  const factsConversationRef = useRef<any>(null);
@ -69,45 +67,49 @@ const ResumeBuilder: React.FC<ResumeBuilderProps> = ({
    if (messages === undefined || messages.length === 0) {
      return [];
    }
-    console.log("filterJobDescriptionMessages disabled")
+    console.log("filterJobDescriptionMessages disabled", messages)
    if (messages.length > 1) {
      setHasResume(true);
    }

+    messages[0].role = 'content';
+    messages[0].title = 'Job Description';
+    messages[0].disableCopy = false;
+
    return messages;

-    let reduced = messages.filter((m, i) => {
-      const keep = (m.metadata?.origin || m.origin || "no origin") === 'job_description';
-      if ((m.metadata?.origin || m.origin || "no origin") === 'resume') {
-        setHasResume(true);
-      }
-      // if (!keep) {
-      //   console.log(`filterJobDescriptionMessages: ${i + 1} filtered:`, m);
-      // } else {
-      //   console.log(`filterJobDescriptionMessages: ${i + 1}:`, m);
-      // }
+    // let reduced = messages.filter((m, i) => {
+    //   const keep = (m.metadata?.origin || m.origin || "no origin") === 'job_description';
+    //   if ((m.metadata?.origin || m.origin || "no origin") === 'resume') {
+    //     setHasResume(true);
+    //   }
+    //   // if (!keep) {
+    //   //   console.log(`filterJobDescriptionMessages: ${i + 1} filtered:`, m);
+    //   // } else {
+    //   //   console.log(`filterJobDescriptionMessages: ${i + 1}:`, m);
+    //   // }

-      return keep;
-    });
+    //   return keep;
+    // });

-    /* If Resume hasn't occurred yet and there is still more than one message, 
-     * resume has been generated. */
-    if (!hasResume && reduced.length > 1) {
-      setHasResume(true);
-    }
+    // /* If Resume hasn't occurred yet and there is still more than one message,
+    //  * resume has been generated. */
+    // if (!hasResume && reduced.length > 1) {
+    //   setHasResume(true);
+    // }

-    if (reduced.length > 0) {
-      // First message is always 'content'
-      reduced[0].title = 'Job Description';
-      reduced[0].role = 'content';
-      setHasJobDescription(true);
-    }
+    // if (reduced.length > 0) {
+    //   // First message is always 'content'
+    //   reduced[0].title = 'Job Description';
+    //   reduced[0].role = 'content';
+    //   setHasJobDescription(true);
+    // }

-    /* Filter out any messages which the server injected for state management */
-    reduced = reduced.filter(m => m.display !== "hide");
+    // /* Filter out any messages which the server injected for state management */
+    // reduced = reduced.filter(m => m.display !== "hide");

-    return reduced;
-  }, [setHasJobDescription, setHasResume, hasResume]);
+    // return reduced;
+  }, [setHasResume/*, setHasJobDescription, hasResume*/]);

  const filterResumeMessages = useCallback((messages: MessageList): MessageList => {
    if (messages === undefined || messages.length === 0) {
@ -119,47 +121,47 @@ const ResumeBuilder: React.FC<ResumeBuilderProps> = ({
    }
    return messages;

-    let reduced = messages.filter((m, i) => {
-      const keep = (m.metadata?.origin || m.origin || "no origin") === 'resume';
-      if ((m.metadata?.origin || m.origin || "no origin") === 'fact_check') {
-        setHasFacts(true);
-      }
-      if (!keep) {
-        console.log(`filterResumeMessages: ${i + 1} filtered:`, m);
-      } else {
-        console.log(`filterResumeMessages: ${i + 1}:`, m);
-      }
-      return keep;
-    });
+    // let reduced = messages.filter((m, i) => {
+    //   const keep = (m.metadata?.origin || m.origin || "no origin") === 'resume';
+    //   if ((m.metadata?.origin || m.origin || "no origin") === 'fact_check') {
+    //     setHasFacts(true);
+    //   }
+    //   if (!keep) {
+    //     console.log(`filterResumeMessages: ${i + 1} filtered:`, m);
+    //   } else {
+    //     console.log(`filterResumeMessages: ${i + 1}:`, m);
+    //   }
+    //   return keep;
+    // });

-    /* If there is more than one message, it is user: "...JOB_DESCRIPTION...", assistant: "...RESUME..."
-     * which means a resume has been generated. */
-    if (reduced.length > 1) {
-      /* Remove the assistant message from the UI */
-      if (reduced[0].role === "user") {
-        reduced.splice(0, 1);
-      }
-    }
+    // /* If there is more than one message, it is user: "...JOB_DESCRIPTION...", assistant: "...RESUME..."
+    //  * which means a resume has been generated. */
+    // if (reduced.length > 1) {
+    //   /* Remove the assistant message from the UI */
+    //   if (reduced[0].role === "user") {
+    //     reduced.splice(0, 1);
+    //   }
+    // }

-    /* If Fact Check hasn't occurred yet and there is still more than one message, 
-     * facts have have been generated. */
-    if (!hasFacts && reduced.length > 1) {
-      setHasFacts(true);
-    }
+    // /* If Fact Check hasn't occurred yet and there is still more than one message,
+    //  * facts have have been generated. */
+    // if (!hasFacts && reduced.length > 1) {
+    //   setHasFacts(true);
+    // }

-    /* Filter out any messages which the server injected for state management */
-    reduced = reduced.filter(m => m.display !== "hide");
+    // /* Filter out any messages which the server injected for state management */
+    // reduced = reduced.filter(m => m.display !== "hide");

-    /* If there are any messages, there is a resume */
-    if (reduced.length > 0) {
-      // First message is always 'content'
-      reduced[0].title = 'Resume';
-      reduced[0].role = 'content';
-      setHasResume(true);
-    }
+    // /* If there are any messages, there is a resume */
+    // if (reduced.length > 0) {
+    //   // First message is always 'content'
+    //   reduced[0].title = 'Resume';
+    //   reduced[0].role = 'content';
+    //   setHasResume(true);
+    // }

-    return reduced;
-  }, [setHasResume, hasFacts, setHasFacts]);
+    // return reduced;
+  }, [/*setHasResume, hasFacts,*/ setHasFacts]);

  const filterFactsMessages = useCallback((messages: MessageList): MessageList => {
    if (messages === undefined || messages.length === 0) {
@ -170,41 +172,46 @@ const ResumeBuilder: React.FC<ResumeBuilderProps> = ({

    // messages.forEach((m, i) => console.log(`filterFactsMessages: ${i + 1}:`, m))

-    const reduced = messages.filter(m => {
-      return (m.metadata?.origin || m.origin || "no origin") === 'fact_check';
-    });
+    // const reduced = messages.filter(m => {
+    //   return (m.metadata?.origin || m.origin || "no origin") === 'fact_check';
+    // });

-    /* If there is more than one message, it is user: "Fact check this resume...", assistant: "...FACT CHECK..."
-     * which means facts have been generated. */
-    if (reduced.length > 1) {
-      /* Remove the user message from the UI */
-      if (reduced[0].role === "user") {
-        reduced.splice(0, 1);
-      }
-      // First message is always 'content'
-      reduced[0].title = 'Fact Check';
-      reduced[0].role = 'content';
+    // /* If there is more than one message, it is user: "Fact check this resume...", assistant: "...FACT CHECK..."
+    //  * which means facts have been generated. */
+    // if (reduced.length > 1) {
+    //   /* Remove the user message from the UI */
+    //   if (reduced[0].role === "user") {
+    //     reduced.splice(0, 1);
+    //   }
+    //   // First message is always 'content'
+    //   reduced[0].title = 'Fact Check';
+    //   reduced[0].role = 'content';
+    //   setHasFacts(true);
+    // }
+
+    // return reduced;
+  }, [/*setHasFacts*/]);
+
+  const jobResponse = useCallback(async (message: MessageData) => {
+    console.log('onJobResponse', message);
+    if (message.actions && message.actions.includes("resume_generated")) {
+      await resumeConversationRef.current.fetchHistory();
+      setHasResume(true);
+      setActiveTab(1); // Switch to Resume tab
+    }
+    if (message.actions && message.actions.includes("facts_checked")) {
+      await factsConversationRef.current.fetchHistory();
      setHasFacts(true);
    }
+  }, [setHasFacts, setHasResume, setActiveTab]);

-    return reduced;
-  }, [setHasFacts]);
-
-  const jobResponse = useCallback((message: MessageData): MessageData => {
-    console.log('onJobResponse', message);
-    setHasResume(true);
-    return message;
-  }, []);
-
-  const resumeResponse = useCallback((message: MessageData): MessageData => {
+  const resumeResponse = useCallback((message: MessageData): void => {
    console.log('onResumeResponse', message);
    setHasFacts(true);
-    return message;
  }, [setHasFacts]);

-  const factsResponse = useCallback((message: MessageData): MessageData => {
+  const factsResponse = useCallback((message: MessageData): void => {
    console.log('onFactsResponse', message);
-    return message;
  }, []);

  const resetJobDescription = useCallback(() => {
--- a/frontend/src/Scrollable.tsx
+++ b/frontend/src/Scrollable.tsx
@ -21,13 +21,13 @@ const Scrollable = (props: ScrollableProps) => {

  return (
    <Box
-      className={className || "Scrollable"}
+      className={`Scrollable ${className || ""}`}
      sx={{
        display: 'flex',
        margin: '0 auto',
        flexGrow: 1,
        overflow: 'auto',
-        backgroundColor: '#F5F5F5',
+        // backgroundColor: '#F5F5F5',
        ...sx,
      }}
      ref={autoscroll !== undefined && autoscroll !== false ? scrollRef : undefined}
--- a/frontend/src/StyledMarkdown.css
+++ b/frontend/src/StyledMarkdown.css
@ -0,0 +1,15 @@
+pre:not(.MessageContent) {
+  overflow: initial;
+  max-height: initial;
+}
+
+.MessageContent pre {
+  border: none;
+  border-left: 1px solid white;
+  padding-left: 1rem;
+  height: fit-content; /* Natural height */
+  font-family: monospace; /* Ensure monospace font */
+  display: flex;
+  flex-grow: 1;
+  overflow: visible;
+}
--- a/frontend/src/StyledMarkdown.tsx
+++ b/frontend/src/StyledMarkdown.tsx
@ -1,49 +1,79 @@
 import React from 'react';
 import { MuiMarkdown } from 'mui-markdown';
-import { useTheme } from '@mui/material/styles';
+import { SxProps, useTheme } from '@mui/material/styles';
 import { Link } from '@mui/material';
 import { ChatQuery, QueryOptions } from './ChatQuery';
+import Box from '@mui/material/Box';
+
+import { Mermaid } from './Mermaid';
+
+import './StyledMarkdown.css';

 interface StyledMarkdownProps {
  className?: string,
  content: string,
+  sx?: SxProps,
  submitQuery?: (prompt: string, tunables?: QueryOptions) => void,
-  [key: string]: any, // For any additional props
 };

-const StyledMarkdown: React.FC<StyledMarkdownProps> = ({ className, content, submitQuery, ...props }) => {
+const StyledMarkdown: React.FC<StyledMarkdownProps> = (props: StyledMarkdownProps) => {
+  const { className, content, submitQuery, sx } = props;
  const theme = useTheme();
-  
-  let options: any = {
-    overrides: {
-      a: {
-        component: Link,
-        props: {
-          sx: { 
-            wordBreak: "break-all",
-            color: theme.palette.secondary.main,
-            textDecoration: 'none',
-            '&:hover': {
-              color: theme.palette.custom.highlight,
-              textDecoration: 'underline',
-            }
+
+  const overrides: any = {
+    pre: {
+      component: (element: any) => {
+        const { className } = element.children.props;
+        const chart = element.children?.props?.children || "";
+        if (className === "lang-mermaid") {
+          console.log(`StyledMarkdown pre: ${className}`);
+          return <Mermaid className="Mermaid" chart={chart} />;
+        }
+        return <pre><code className={className}>{element.children}</code></pre>;
+      },
+    },
+    a: {
+      component: Link,
+      props: {
+        sx: {
+          wordBreak: "break-all",
+          color: theme.palette.secondary.main,
+          textDecoration: 'none',
+          '&:hover': {
+            color: theme.palette.custom.highlight,
+            textDecoration: 'underline',
          }
        }
-      },
-      ChatQuery: undefined
+      }
    },
+    chatQuery: undefined
  };

  if (submitQuery) {
-    options.overrides.ChatQuery = {
+    overrides.ChatQuery = {
      component: ChatQuery,
      props: {
        submitQuery,
      },
    };
  }
-  
-  return <MuiMarkdown className={className} {...options} children={content} {...props} />;
+
+  return <Box
+    className={`MuiMarkdown ${className || ""}`}
+    sx={{
+      display: "flex",
+      m: 0,
+      p: 0,
+      boxSizing: "border-box",
+      flexGrow: 1,
+      height: "auto",
+      ...sx
+    }}>
+    <MuiMarkdown
+      overrides={overrides}
+      children={content}
+    />
+  </Box>;
 };

 export { StyledMarkdown };
--- a/frontend/src/VectorVisualizer.tsx
+++ b/frontend/src/VectorVisualizer.tsx
@ -1,7 +1,6 @@
 import React, { useEffect, useState, useRef } from 'react';
 import Box from '@mui/material/Box';
 import Card from '@mui/material/Card';
-import Typography from '@mui/material/Typography';
 import Plot from 'react-plotly.js';
 import TextField from '@mui/material/TextField';
 import Tooltip from '@mui/material/Tooltip';
@ -10,10 +9,16 @@ import SendIcon from '@mui/icons-material/Send';
 import FormControlLabel from '@mui/material/FormControlLabel';
 import Switch from '@mui/material/Switch';
 import { SxProps, Theme } from '@mui/material';
+import useMediaQuery from '@mui/material/useMediaQuery';
+import { useTheme } from '@mui/material/styles';
+import JsonView from '@uiw/react-json-view';

 import { SetSnackType } from './Snack';
+import { Scrollable } from './Scrollable';
+import { StyledMarkdown } from './StyledMarkdown';

 import './VectorVisualizer.css';
+import { calculatePoint } from 'mermaid/dist/utils';

 interface Metadata {
  doc_type?: string;
@ -120,6 +125,8 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
    background: string,
    color: string,
  } | null>(null);
+  const theme = useTheme();
+  const isMobile = useMediaQuery(theme.breakpoints.down('md'));

  // Get the collection to visualize
  useEffect(() => {
@ -273,7 +280,6 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
    return null;
  }

-
  const handleKeyPress = (event: any) => {
    if (event.key === 'Enter') {
      sendQuery(newQuery);
@ -309,7 +315,7 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
  );

  return (
-    <Box className="VectorVisualizer"
+    <Card className="VectorVisualizer"
      sx={{
        display: 'flex',
        position: 'relative',
@ -319,87 +325,125 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
      }}>
      {
        !inline &&
-        <Card sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, minHeight: '2.5rem', justifyContent: 'center', alignItems: 'center', m: 0, p: 0, mb: 1 }}>
-          <Typography variant="h6" sx={{ p: 1, pt: 0 }}>
-              RAG Vector Visualization
-          </Typography>
-        </Card>
-      }
-      <FormControlLabel
-        sx={{
-          display: "flex",
-          position: "relative",
-          width: "fit-content",
-          ml: 1,
-          mb: '-2.5rem',
-          zIndex: 100,
-          flexBasis: 0,
-          flexGrow: 0
-        }}
-        control={<Switch checked={!view2D} />} onChange={() => setView2D(!view2D)} label="3D" />
-      <Plot
-        ref={plotlyRef}
-        onClick={(event: any) => {
-          const point = event.points[0];
-          console.log('Point:', point);
-          const type = point.customdata.type;
-          const text = point.customdata.doc;
-          const emoji = emojiMap[type] || '❓';
-          setTooltip({
-            visible: true,
-            background: point['marker.color'],
-            color: getTextColorForBackground(point['marker.color']),
-            content: `${emoji} ${type.toUpperCase()}\n${text}`,
-          });
-        }}
-        data={[plotData.data]}
-        useResizeHandler={true}
-        config={{
-          responsive: true,
-          // displayModeBar: false,
-          displaylogo: false,
-          showSendToCloud: false,
-          staticPlot: false,
-        }}
-        style={{
-          display: "flex",
-          flexGrow: 1,
-          minHeight: '240px',
-          padding: 0,
-          margin: 0,
-          width: "100%",
-          height: "100%",
-        }}
-        layout={plotData.layout}
-      />
-      {!inline &&
-        <Card sx={{
-          display: 'flex',
-          flexDirection: 'column',
-          flexGrow: 1,
-          mt: 1,
-          p: 0.5,
-          color: tooltip?.color || '#2E2E2E',
-          background: tooltip?.background || '#FFFFFF',
-          whiteSpace: 'pre-line',
-          zIndex: 1000,
-          overflow: 'auto',
-          maxHeight: '20vh',
-          minHeight: '20vh',
-          overflowWrap: 'break-all',
-          wordBreak: 'break-all',
-        }}
-        >
-          <Typography variant="body2" sx={{ p: 1, pt: 0 }}>
-            {tooltip?.content}
-          </Typography>
+        <Card sx={{ display: 'flex', flexDirection: 'column', flexGrow: 0, minHeight: '2.5rem', maxHeight: '2.5rem', height: '2.5rem', justifyContent: 'center', alignItems: 'center', m: 0, p: 0, mb: 1 }}>
+          RAG Vector Visualization
        </Card>
      }
+      <Card sx={{ p: 0, m: 0, display: "flex", flexGrow: 1, position: "relative", flexDirection: isMobile ? "column" : "row" }}>
+        <Box sx={{ p: 0, m: 0, display: "flex", flexGrow: 1, position: "relative", flexDirection: "column" }}>
+          <Box sx={{
+            borderBottom: "1px solid #2E2E2E",
+            p: 0, m: 0,
+            display: "flex",
+            flexGrow: 0,
+            height: isMobile ? "auto" : "320px",
+            minHeight: isMobile ? "auto" : "320px",
+            maxHeight: isMobile ? "auto" : "320px",
+            position: "relative",
+            flexDirection: "column"
+          }}>
+            <FormControlLabel
+              sx={{
+                display: "flex",
+                position: "relative",
+                width: "fit-content",
+                ml: 1,
+                mb: '-2.5rem',
+                zIndex: 100,
+                flexBasis: 0,
+                flexGrow: 0
+              }}
+              control={<Switch checked={!view2D} />} onChange={() => setView2D(!view2D)} label="3D" />
+            <Plot
+              ref={plotlyRef}
+              onClick={(event: any) => {
+                const point = event.points[0];
+                console.log('Point:', point);
+                const type = point.customdata.type;
+                const text = point.customdata.doc;
+                const emoji = emojiMap[type] || '❓';
+                setTooltip({
+                  visible: true,
+                  background: point['marker.color'],
+                  color: getTextColorForBackground(point['marker.color']),
+                  content: `${emoji} ${type.toUpperCase()}\n${text}`,
+                });
+              }}
+              data={[plotData.data]}
+              useResizeHandler={true}
+              config={{
+                responsive: true,
+                // displayModeBar: false,
+                displaylogo: false,
+                showSendToCloud: false,
+                staticPlot: false,
+              }}
+              style={{
+                display: "flex",
+                flexGrow: 1,
+                minHeight: '240px',
+                padding: 0,
+                margin: 0,
+                width: "100%",
+                height: "100%",
+              }}
+              layout={plotData.layout}
+            />
+          </Box>
+          {
+            !inline && newQueryEmbedding && <Scrollable sx={{
+              display: "flex",
+              position: "relative",
+              width: "100%",
+              flexGrow: 1,
+              height: "max-content",
+              backgroundColor: "white",
+            }}>
+              <JsonView
+                displayDataTypes={false}
+                objectSortKeys={true}
+                collapsed={1}
+                value={newQueryEmbedding}
+                style={{
+                  fontSize: "0.8rem",
+                  overflow: "hidden",
+                  width: "100%",
+                  minHeight: "max-content",
+                }} />
+            </Scrollable>
+          }
+          {
+            !inline && !newQueryEmbedding && <Box sx={{ p: 1 }}>Enter query below to view distances.</Box>
+          }
+        </Box>
+
+        {!inline &&
+          <Scrollable sx={{
+            borderLeft: isMobile ? "none" : "1px solid #2E2E2E",
+            display: 'flex',
+            flexDirection: 'column',
+            flexGrow: isMobile ? 1 : 0.5,
+            width: isMobile ? "100%" : "600px",
+            maxWidth: isMobile ? "100%" : "600px",
+            // height: "calc(100vh - 72px - 144px)",
+            mt: 0,
+            p: 0.5,
+            color: tooltip?.color || '#2E2E2E',
+            background: tooltip?.background || '#FFFFFF',
+            whiteSpace: 'pre-line',
+            zIndex: 1000,
+            overflow: 'auto',
+            overflowWrap: 'break-all',
+            wordBreak: 'break-all',
+          }}
+          >
+            <StyledMarkdown sx={{ p: 1, pt: 0 }} content={tooltip?.content || "Select a node in the visualization."} />
+          </Scrollable>
+        }
+      </Card>
      {!inline && newQueryEmbedding !== undefined &&
-        <Card sx={{ display: 'flex', flexDirection: 'column', justifyContent: 'center', alignItems: 'center', mt: 1, pb: 0 }}>
-          <Typography variant="h6" sx={{ p: 1, pt: 0, maxHeight: '5rem', overflow: 'auto' }}>
-            Query: {newQueryEmbedding.query}
-          </Typography>
+        <Card sx={{ display: 'flex', flexDirection: 'column', justifyContent: 'center', flexGrow: 0, minHeight: '2.5rem', maxHeight: '2.5rem', height: '2.5rem', alignItems: 'center', mt: 1, pb: 0 }}>
+          Query: {newQueryEmbedding.query}
        </Card>
      }

@ -421,7 +465,7 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
            </Tooltip>
          </Box>
      }
-    </Box>
+    </Card>
  );
 };

--- a/frontend/src/useAutoScrollToBottom.tsx
+++ b/frontend/src/useAutoScrollToBottom.tsx
@ -1,16 +1,58 @@
 import { useEffect, useRef, RefObject, useCallback } from 'react';

+const debug: boolean = false;
+
 type ResizeCallback = () => void;

+// Define the debounce function with cancel capability
+function debounce<T extends (...args: any[]) => void>(func: T, wait: number) {
+  let timeout: NodeJS.Timeout | null = null;
+  let lastCall: number = 0;
+
+  const debounced = function (...args: Parameters<T>) {
+    const now = Date.now();
+
+    // Execute immediately if wait time has passed since last call
+    if (now - lastCall >= wait) {
+      lastCall = now;
+      // Clear any existing timeout to prevent stale executions
+      if (timeout) {
+        clearTimeout(timeout);
+        timeout = null;
+      }
+      func(...args);
+      return;
+    }
+
+    // Schedule for remaining time if no timeout is pending
+    if (!timeout) {
+      timeout = setTimeout(() => {
+        lastCall = Date.now();
+        func(...args);
+        timeout = null;
+      }, wait - (now - lastCall));
+    }
+  };
+
+  // Add cancel method to clear pending timeout
+  debounced.cancel = function () {
+    if (timeout) {
+      clearTimeout(timeout);
+      timeout = null;
+    }
+  };
+
+  return debounced;
+}
+
 const useResizeObserverAndMutationObserver = (
  targetRef: RefObject<HTMLElement | null>,
-  scrollToRef: RefObject<HTMLElement | null>,
+  scrollToRef: RefObject<HTMLElement | null> | null,
  callback: ResizeCallback
 ) => {
  const callbackRef = useRef(callback);
  const resizeObserverRef = useRef<ResizeObserver | null>(null);
  const mutationObserverRef = useRef<MutationObserver | null>(null);
-  const debounceTimeout = useRef<NodeJS.Timeout | null>(null);

  useEffect(() => {
    callbackRef.current = callback;
@ -18,18 +60,16 @@ const useResizeObserverAndMutationObserver = (

  useEffect(() => {
    const container = targetRef.current;
-    const scrollTo = scrollToRef.current;
+    const scrollTo = scrollToRef?.current;
    if (!container) return;

-    const debouncedCallback = (entries: ResizeObserverEntry[] | undefined) => {
-      if (debounceTimeout.current) clearTimeout(debounceTimeout.current);
-      debounceTimeout.current = setTimeout(() => {
-        requestAnimationFrame(() => callbackRef.current());
-      }, 50);
-    };
+    const debouncedCallback = debounce((target: string) => {
+      debug && console.debug(`"debouncedCallback(${target})`);
+      requestAnimationFrame(() => callbackRef.current());
+    }, 500);

-    const resizeObserver = new ResizeObserver(debouncedCallback);
-    const mutationObserver = new MutationObserver(() => { debouncedCallback(undefined); });
+    const resizeObserver = new ResizeObserver((e: any) => { debouncedCallback("resize"); });
+    const mutationObserver = new MutationObserver((e: any) => { debouncedCallback("mutation"); });

    // Observe container size
    resizeObserver.observe(container);
@ -49,7 +89,7 @@ const useResizeObserverAndMutationObserver = (
    mutationObserverRef.current = mutationObserver;

    return () => {
-      if (debounceTimeout.current) clearTimeout(debounceTimeout.current);
+      debouncedCallback.cancel();
      resizeObserver.disconnect();
      mutationObserver.disconnect();
    };
@ -60,7 +100,7 @@ const useResizeObserverAndMutationObserver = (
 * Auto-scroll hook for scrollable containers.
 * Scrolls to the bottom of the container on paste or when TextField is fully/partially visible.
 */
-export const useAutoScrollToBottom = (
+const useAutoScrollToBottom = (
  scrollToRef: RefObject<HTMLElement | null>,
  smooth: boolean = true,
  fallbackThreshold: number = 0.33,
@ -91,10 +131,9 @@ export const useAutoScrollToBottom = (

      // Scroll on paste or if TextField is visible and user isn't scrolling up
      shouldScroll = (isPasteEvent || isTextFieldVisible) && !isUserScrollingUpRef.current;
-
      if (shouldScroll) {
        requestAnimationFrame(() => {
-          console.debug('Scrolling to container bottom:', {
+          debug && console.debug('Scrolling to container bottom:', {
            scrollHeight: container.scrollHeight,
            scrollToHeight: scrollToRect.height,
            containerHeight: container.clientHeight,
@ -102,10 +141,10 @@ export const useAutoScrollToBottom = (
            isTextFieldVisible,
            isUserScrollingUp: isUserScrollingUpRef.current,
          });
-            container.scrollTo({
-              top: container.scrollHeight,
-              behavior: smooth ? 'smooth' : 'auto',
-            });
+          container.scrollTo({
+            top: container.scrollHeight,
+            behavior: smooth ? 'smooth' : 'auto',
+          });
        });
      }
    } else {
@ -118,7 +157,7 @@ export const useAutoScrollToBottom = (

      if (shouldScroll) {
        requestAnimationFrame(() => {
-          console.debug('Scrolling to container bottom (fallback):', { scrollHeight });
+          debug && console.debug('Scrolling to container bottom (fallback):', { scrollHeight });
          container.scrollTo({
            top: container.scrollHeight,
            behavior: smooth ? 'smooth' : 'auto',
@ -136,11 +175,13 @@ export const useAutoScrollToBottom = (
    const handleScroll = () => {
      const currentScrollTop = container.scrollTop;
      isUserScrollingUpRef.current = currentScrollTop < lastScrollTop.current;
+      debug && console.debug(`Scrolling up: ${isUserScrollingUpRef.current}`);
      lastScrollTop.current = currentScrollTop;

      if (scrollTimeout.current) clearTimeout(scrollTimeout.current);
      scrollTimeout.current = setTimeout(() => {
        isUserScrollingUpRef.current = false;
+        debug && console.debug(`Scrolling up: ${isUserScrollingUpRef.current}`);
      }, 500);
    };

@ -170,4 +211,9 @@ export const useAutoScrollToBottom = (
  useResizeObserverAndMutationObserver(containerRef, scrollToRef, checkAndScrollToBottom);

  return containerRef;
-};
+};
+
+export {
+  useResizeObserverAndMutationObserver,
+  useAutoScrollToBottom
+}
--- a/prometheus.yml
+++ b/prometheus.yml
@ -0,0 +1,20 @@
+global:
+
+scrape_configs:
+  - job_name: 'backstory'
+    scrape_interval: 5s
+    metrics_path: /metrics
+    scheme: https
+    static_configs:
+      - targets: ['backstory:8911']
+    tls_config:
+      insecure_skip_verify: true
+
+  - job_name: 'backstory-prod'
+    scrape_interval: 30s
+    metrics_path: /metrics
+    scheme: https
+    static_configs:
+      - targets: ['backstory-prod:8911']
+    tls_config:
+      insecure_skip_verify: true
--- a/src/server.py
+++ b/src/server.py
@ -1,3 +1,5 @@
+LLM_TIMEOUT=600
+
 from utils import logger

 from typing import AsyncGenerator
@ -19,8 +21,10 @@ import warnings
 from typing import Any
 from collections import deque
 from datetime import datetime
-
+import inspect
 from uuid import uuid4
+import time
+import traceback

 def try_import(module_name, pip_name=None):
    try:
@ -327,7 +331,6 @@ class WebServer:

            except Exception as e:
                logger.error(f"put_umap error: {str(e)}")
-                import traceback
                logger.error(traceback.format_exc())
                return JSONResponse({"error": str(e)}, 500)

@ -392,20 +395,6 @@ class WebServer:
                    match reset_operation:
                        case "system_prompt":
                            logger.info(f"Resetting {reset_operation}")
-                            # match agent_type:
-                            #     case "chat":
-                            #         prompt = system_message
-                            #     case "job_description":
-                            #         prompt = system_generate_resume
-                            #     case "resume":
-                            #         prompt = system_generate_resume
-                            #     case "fact_check":
-                            #         prompt = system_message
-                            #     case _:
-                            #         prompt = system_message
-
-                            # agent.system_prompt = prompt
-                            # response["system_prompt"] = { "system_prompt": prompt }
                        case "rags":
                            logger.info(f"Resetting {reset_operation}")
                            context.rags = rags.copy()
@ -537,95 +526,106 @@ class WebServer:

            try:
                context = self.upsert_context(context_id)
+                agent = context.get_agent(agent_type)
+            except Exception as e:
+                logger.info(f"Attempt to create agent type: {agent_type} failed", e)
+                return JSONResponse({"error": f"{agent_type} is not recognized or context {context_id} is invalid "}, status_code=404)

-                try:
-                    agent = context.get_agent(agent_type)
-                except Exception as e:
-                    logger.info(f"Attempt to create agent type: {agent_type} failed", e)
-                    return JSONResponse({"error": f"{agent_type} is not recognized", "context": context.id}, status_code=404)
-
+            try:
                query = await request.json()
                prompt = query["prompt"]
                if not isinstance(prompt, str) or len(prompt) == 0:
                    logger.info(f"Prompt is empty")
                    return JSONResponse({"error": "Prompt cannot be empty"}, status_code=400)
+            except Exception as e:
+                logger.info(f"Attempt to parse request: {str(e)}.")
+                return JSONResponse({"error": f"Attempt to parse request: {str(e)}."}, status_code=400)
                
-                try:
-                    options = Tunables(**query["options"]) if "options" in query else None
-                except Exception as e:
-                    logger.info(f"Attempt to set tunables failed: {query['options']}.", e)
-                    return JSONResponse({"error": f"Invalid options: {query['options']}"}, status_code=400)
-
-                if not agent:
-                    match agent_type:
-                        case "job_description":
-                            logger.info(f"Agent {agent_type} not found. Returning empty history.")
-                            agent = context.get_or_create_agent("job_description", job_description=prompt)
-                        case _:
-                            logger.info(f"Invalid agent creation sequence for {agent_type}. Returning error.")
-                            return JSONResponse({"error": f"{agent_type} is not recognized", "context": context.id}, status_code=404)
+            try:
+                options = Tunables(**query["options"]) if "options" in query else None
+            except Exception as e:
+                logger.info(f"Attempt to set tunables failed: {query['options']}.", e)
+                return JSONResponse({"error": f"Invalid options: {query['options']}"}, status_code=400)

+            if not agent:
+                match agent_type:
+                    case "job_description":
+                        logger.info(f"Agent {agent_type} not found. Returning empty history.")
+                        agent = context.get_or_create_agent("job_description", job_description=prompt)
+                    case _:
+                        logger.info(f"Invalid agent creation sequence for {agent_type}. Returning error.")
+                        return JSONResponse({"error": f"{agent_type} is not recognized", "context": context.id}, status_code=404)
+            try:
                async def flush_generator():
-                    logger.info(f"Message starting. Streaming partial results.")
-                    # Create a cancellable task to manage the generator
-                    loop = asyncio.get_running_loop()
-                    stop_event = asyncio.Event()
-
-                    async def process_generator():
-                        try:
-                            async for message in self.generate_response(context=context, agent=agent, prompt=prompt, options=options):
-                                if stop_event.is_set():
-                                    logger.info("Stopping generator due to client disconnection.")
-                                    return
-                                if message.status != "done":
+                    logger.info(f"{agent.agent_type} - {inspect.stack()[0].function}")
+                    try:
+                        start_time = time.perf_counter()
+                        async for message in self.generate_response(context=context, agent=agent, prompt=prompt, options=options):
+                            if message.status != "done":
+                                if message.status == "streaming":
                                    result = {
-                                        "status": message.status,
-                                        "response": message.response
+                                        "status": "streaming",
+                                        "chunk": message.chunk,
+                                        "remaining_time": LLM_TIMEOUT - (time.perf_counter() - start_time)
                                    }
                                else:
-                                    logger.info(f"Message complete. Providing full response.")
-                                    try:
-                                        result = message.model_dump(by_alias=True, mode='json')
-                                    except Exception as e:
-                                        result = {"status": "error", "response": str(e)}
-                                        yield json.dumps(result) + "\n"
-                                        return
+                                    start_time = time.perf_counter()
+                                    result = {
+                                        "status": message.status,
+                                        "response": message.response,
+                                        "remaining_time": LLM_TIMEOUT
+                                    }
+                            else:
+                                logger.info(f"Message complete. Providing full response.")
+                                try:
+                                    message.response = message.response
+                                    result = message.model_dump(by_alias=True, mode='json')
+                                except Exception as e:
+                                    result = {"status": "error", "response": str(e)}
+                                    yield json.dumps(result) + "\n"
+                                    return

-                                # Convert to JSON and add newline
-                                result = json.dumps(result) + "\n"
-                                message.network_packets += 1
-                                message.network_bytes += len(result)
-                                yield result
-                                # Allow the event loop to process the write
-                                await asyncio.sleep(0)
-                        except Exception as e:
-                            logger.error(f"Error in process_generator: {e}")
-                            yield json.dumps({"status": "error", "response": str(e)}) + "\n"
-                        finally:
-                            # Save context on completion or error
-                            self.save_context(context_id)
-
-                    # Create a generator iterator
-                    gen = process_generator()
-                    
-                    try:
-                        async for result in gen:
-                            # Check if client has disconnected
+                            # Convert to JSON and add newline
+                            result = json.dumps(result) + "\n"
+                            message.network_packets += 1
+                            message.network_bytes += len(result)
                            if await request.is_disconnected():
-                                logger.info("Client disconnected, stopping generator.")
-                                stop_event.set()  # Signal the generator to stop
+                                logger.info("Disconnect detected. Aborting generation.")
+                                context.processing = False
+                                # Save context on completion or error
+                                message.prompt = prompt
+                                message.status = "error"
+                                message.response = "Client disconnected during generation."
+                                agent.conversation.add(message)
+                                self.save_context(context_id)
                                return
+
                            yield result
+
+                            current_time = time.perf_counter()
+                            if current_time - start_time > LLM_TIMEOUT:
+                                message.status = "error"
+                                message.response = f"Processing time ({LLM_TIMEOUT}s) exceeded for single LLM inference (likely due to LLM getting stuck.) You will need to retry your query."
+                                message.partial_response = message.response
+                                logger.info(message.response + " Ending session")
+                                result = message.model_dump(by_alias=True, mode='json')
+                                result = json.dumps(result) + "\n"
+                                yield result
+
+                            if message.status == "error":
+                                context.processing = False
+                                return
+
+                            # Allow the event loop to process the write
+                            await asyncio.sleep(0)
                    except Exception as e:
-                        logger.error(f"Error in flush_generator: {e}")
+                        context.processing = False
+                        logger.error(f"Error in process_generator: {e}")
+                        logger.error(traceback.format_exc())
                        yield json.dumps({"status": "error", "response": str(e)}) + "\n"
                    finally:
-                        stop_event.set()  # Ensure generator stops if not already stopped
-                        # Ensure generator is fully closed
-                        try:
-                            await gen.aclose()
-                        except Exception as e:
-                            logger.warning(f"Error closing generator: {e}")
+                        # Save context on completion or error
+                        self.save_context(context_id)
                                    
                # Return StreamingResponse with appropriate headers
                return StreamingResponse(
@ -638,6 +638,7 @@ class WebServer:
                    }
                )
            except Exception as e:
+                context.processing = False
                logger.error(f"Error in post_chat_endpoint: {e}")
                return JSONResponse({"error": str(e)}, status_code=500)

@ -649,7 +650,6 @@ class WebServer:
                return JSONResponse({ "id": context.id })
            except Exception as e:
                logger.error(f"get_history error: {str(e)}")
-                import traceback
                logger.error(traceback.format_exc())
                return JSONResponse({"error": str(e)}, status_code=404)

@ -872,12 +872,14 @@ class WebServer:
                return
            if message.status != "done":
                yield message
+
        async for message in agent.process_message(self.llm, self.model, message):
-            if message.status == "error":
-                yield message
-                return
            if message.status != "done":
                yield message
+    
+        if message.status == "error":
+            return
+
        logger.info(f"{agent_type}.process_message: {message.status} {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
        if message.metadata["eval_count"]:
            agent.metrics.tokens_prompt.labels(agent=agent.agent_type).inc(message.metadata["prompt_eval_count"])
@ -886,416 +888,6 @@ class WebServer:
        yield message
        return

-        if self.processing:
-            logger.info("TODO: Implement delay queing; busy for same agent, otherwise return queue size and estimated wait time")
-            yield {"status": "error", "message": "Busy processing another request."}
-            return
-
-        self.processing = True
-
-        conversation : Conversation = agent.conversation
-
-        message = Message(prompt=content)
-        del content # Prevent accidental use of content
-
-        # Default to not using tools
-        enable_tools = False
-
-        # Default to using RAG if there is content to check
-        if message.prompt:
-            enable_rag = True
-        else:
-            enable_rag = False
-
-        # RAG is disabled when asking questions about the resume
-        if agent.get_agent_type() == "resume":
-            enable_rag = False
-
-        # The first time through each agent agent_type a content_seed may be set for
-        # future chat agents; use it once, then clear it
-        message.preamble = agent.get_and_reset_content_seed()
-        system_prompt = agent.system_prompt
-
-        # After the first time a particular agent agent_type is used, it is handled as a chat.
-        # The number of messages indicating the agent is ready for chat varies based on 
-        # the agent_type of agent
-        process_type = agent.get_agent_type()
-        match process_type:
-            case "job_description":
-                logger.info(f"job_description user_history len: {len(conversation.messages)}")
-                if len(conversation.messages) >= 2: # USER, ASSISTANT
-                    process_type = "chat"
-            case "resume":
-                logger.info(f"resume user_history len: {len(conversation.messages)}")
-                if len(conversation.messages) >= 3: # USER, ASSISTANT, FACT_CHECK
-                    process_type = "chat"
-            case "fact_check": 
-                process_type = "chat"     # Fact Check is always a chat agent
-
-        match process_type:
-            # Normal chat interactions with context history
-            case "chat":
-                if not message.prompt:
-                    yield {"status": "error", "message": "No query provided for chat."}
-                    logger.info(f"user_history len: {len(conversation.messages)}")
-                    self.processing = False
-                    return
-
-                enable_tools = True
-
-                # Generate RAG content if enabled, based on the content
-                rag_context = ""
-                if enable_rag:
-                    # Initialize metadata["rag"] to None or a default value
-                    message.metadata["rag"] = None
-
-                    for value in self.generate_rag_results(context, message.prompt):
-                        if "status" in value:
-                            yield value
-                        else:
-                            if value.get("documents") or value.get("rag") is not None:
-                                message.metadata["rag"] = value
-
-                    if message.metadata["rag"]:
-                        for doc in message.metadata["rag"]["documents"]:
-                            rag_context += f"{doc}\n"
-
-                if rag_context:
-                    message.preamble = f"""
-<|context|>
-{rag_context}
-"""
-                if context.user_resume:
-                    message.preamble += f"""
-<|resume|>
-{context.user_resume}
-"""
-
-                message.preamble += """
-<|rules|>
- If there is information in the <|context|> or <|resume|> sections to enhance the answer, incorporate it seamlessly and refer to it using natural language instead of mentioning '<|context|>' or '<|resume|> or quoting it directly.
- Avoid phrases like 'According to the <|context|>' or similar references to the <|context|> or <|resume|>.
-
-<|question|>
-Use that information to respond to:"""
-
-                # Use the mode specific system_prompt instead of 'chat'
-                system_prompt = agent.system_prompt
-
-            # On first entry, a single job_description is provided ("user")
-            # Generate a resume to append to RESUME history
-            case "job_description":
-                # Generate RAG content if enabled, based on the content
-                rag_context = ""
-                if enable_rag:
-                    # Initialize metadata["rag"] to None or a default value
-                    message.metadata["rag"] = None
-
-                    for value in self.generate_rag_results(context, message.prompt):
-                        if "status" in value:
-                            yield value
-                        else:
-                            if value.get("documents") or value.get("rag") is not None:
-                                message.metadata["rag"] = value
-
-                    if message.metadata["rag"]:
-                        for doc in message.metadata["rag"]["documents"]:
-                            rag_context += f"{doc}\n"
-
-                message.preamble = ""
-                if rag_context:
-                    message.preamble += f""" 
-<|context|>
-{rag_context}
-"""
-                    
-                if context.user_resume:
-                    message.preamble += f"""
-<|resume|>
-{context.user_resume}
-"""
-
-                message.preamble += f"""
-<|job_description|>
-{message.prompt}
-"""
-                tmp = context.get_agent("job_description")
-                if not tmp:
-                    raise Exception(f"Job description agent not found.")
-                # Set the content seed for the job_description agent
-                tmp.set_content_seed(message.preamble + "<|question|>\nUse the above information to respond to this prompt: ")
-
-                message.preamble += f"""
-<|rules|>
-1. Use the above <|resume|> and <|context|> to create the resume for the <|job_description|>. 
-2. Do not use content from the <|job_description|> in the response unless the <|context|> or <|resume|> mentions them.
-
-<|question|>
-Use to the above information to respond to this prompt: 
-"""
-
-                # For all future calls to job_description, use the system_job_description
-                agent.system_prompt = system_job_description
-
-                # Seed the history for job_description
-                stuffingMessage = Message(prompt=message.prompt)
-                stuffingMessage.response = "Job description stored to use in future queries."
-                stuffingMessage.metadata["origin"] = "job_description"
-                stuffingMessage.metadata["display"] = "hide"
-                conversation.add(stuffingMessage)
-
-                message.add_action("generate_resume")
-
-                logger.info("TODO: Convert these to generators, eg generate_resume() and then manually add results into agent'resume'")
-                logger.info("TODO: For subsequent runs, have the Agent handler generate the follow up prompts so they can have correct context preamble")
-
-                # Switch to resume agent for LLM responses
-                # message.metadata["origin"] = "resume"
-                # agent = context.get_or_create_agent("resume")
-                # system_prompt = agent.system_prompt
-                # llm_history = agent.llm_history = []
-                # user_history = agent.user_history = []
-
-            # Ignore the passed in content and invoke Fact Check
-            case "resume":
-                if len(context.get_or_create_agent("resume").conversation.messages) < 2: # USER, **ASSISTANT**
-                    raise Exception(f"No resume found in user history.")
-                resume = context.get_or_create_agent("resume").conversation.messages[1]
-
-                # Generate RAG content if enabled, based on the content
-                rag_context = ""
-                if enable_rag:
-                    # Initialize metadata["rag"] to None or a default value
-                    message.metadata["rag"] = None
-
-                    for value in self.generate_rag_results(context, resume["content"]):
-                        if "status" in value:
-                            yield value
-                        else:
-                            if value.get("documents") or value.get("rag") is not None:
-                                message.metadata["rag"] = value
-
-                    if message.metadata["rag"]:
-                        for doc in message.metadata["rag"]["documents"]:
-                            rag_context += f"{doc}\n"
-
-
-                # This is being passed to Fact Check, so do not provide the <|job_description|>
-                message.preamble = f""
-                
-                if rag_context:
-                    message.preamble += f"""
-<|context|>
-{rag_context}
-"""
-                if context.user_resume:
-                    # Do not prefix the resume with <|resume|>; just add to the <|context|>
-                    message.preamble += f"""
-{context.user_resume}
-"""
-
-                message.preamble += f"""
-<|resume|>
-{resume['content']}
-                
-<|rules|>
-1. Do not invent or assume any information not explicitly present in the <|context|>.
-2. Analyze the <|resume|> to identify any discrepancies or inaccuracies based on the <|context|>.
-
-<|question|>
-"""
-                
-                context.get_or_create_agent("resume").set_content_seed(f"""
-<|resume|>
-{resume["content"]}
-
-<|question|>
-Use the above <|resume|> and <|job_description|> to answer this query:
-""")
-
-                message.prompt = "Fact check the resume and report discrepancies."
-
-                # Seed the history for resume
-                messages = [ {
-                    "role": "user", "content": "Fact check resume", "origin": "resume", "display": "hide"
-                }, {
-                    "role": "assistant", "content": "Resume fact checked.", "origin": "resume", "display": "hide"
-                } ]
-                # Do not add this to the LLM history; it is only used for UI presentation
-                stuffingMessage = Message(prompt="Fact check resume")
-                stuffingMessage.response = "Resume fact checked."
-                stuffingMessage.metadata["origin"] = "resume"
-                stuffingMessage.metadata["display"] = "hide"
-                stuffingMessage.actions = [ "fact_check" ]
-                logger.info("TODO: Switch this to use actions to keep the UI from showingit")
-                conversation.add(stuffingMessage)
-
-                # For all future calls to job_description, use the system_job_description
-                logger.info("TODO: Create a system_resume_QA prompt to use for the resume agent")
-                agent.system_prompt = system_prompt
-
-                # Switch to fact_check agent for LLM responses
-                message.metadata["origin"] = "fact_check"
-                agent = context.get_or_create_agent("fact_check", system_prompt=system_fact_check)
-
-                llm_history = agent.llm_history = []
-                user_history = agent.user_history = []
-
-            case _:
-                raise Exception(f"Invalid chat agent_type: {agent_type}")
-
-        conversation.add(message)
-        # llm_history.append({"role": "user", "content": message.preamble + content})
-        # user_history.append({"role": "user", "content": content, "origin": message.metadata["origin"]})
-        # message.metadata["full_query"] = llm_history[-1]["content"]
-
-        # Uses cached system_prompt as agent.system_prompt may have been updated for follow up questions
-        messages = create_system_message(system_prompt)
-        if context.message_history_length:
-            to_add = conversation.messages[-context.message_history_length:]
-        else:
-            to_add = conversation.messages
-        for m in to_add:
-            messages.extend([ {
-                "role": "user",
-                "content": m.content,
-            }, {
-                "role": "assistant",
-                "content": m.response,
-            } ])
-
-        message.content = message.preamble + message.prompt
-
-        # To send to the LLM
-        messages.append({
-            "role": "user",
-            "content": message.content
-        })
-
-        # Add the system message to the beginning of the messages list
-        message.content = f"""
-<|system_prompt|>
-{system_prompt}
-
-{message.preamble}
-{message.prompt}"""
-
-        # Estimate token length of new messages
-        ctx_size = self.get_optimal_ctx_size(context.get_or_create_agent(process_type).context_tokens, messages=message.prompt)
-        
-        if len(conversation.messages) > 2:
-            processing_message = f"Processing {'RAG augmented ' if enable_rag else ''}query..."
-        else:
-            match agent.get_agent_type():
-                case "job_description":
-                    processing_message = f"Generating {'RAG augmented ' if enable_rag else ''}resume..."
-                case "resume":
-                    processing_message = f"Fact Checking {'RAG augmented ' if enable_rag else ''}resume..."
-                case _:
-                    processing_message = f"Processing {'RAG augmented ' if enable_rag else ''}query..."
-    
-        yield {"status": "processing", "message": processing_message, "num_ctx": ctx_size}
-        
-        # Use the async generator in an async for loop
-        try:
-            if enable_tools:
-                response = self.llm.chat(model=self.model, messages=messages, tools=llm_tools(context.tools), options={ "num_ctx": ctx_size })
-            else:
-                response = self.llm.chat(model=self.model, messages=messages, options={ "num_ctx": ctx_size })
-        except Exception as e:
-            logger.exception({ "model": self.model, "error": str(e) })
-            yield {"status": "error", "message": f"An error occurred communicating with LLM"}
-            self.processing = False
-            return
-
-        message.metadata["eval_count"] += response["eval_count"]
-        message.metadata["eval_duration"] += response["eval_duration"]
-        message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
-        message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
-        agent.context_tokens = response["prompt_eval_count"] + response["eval_count"]
-
-        tools_used = []
-        
-        yield {"status": "processing", "message": "Initial response received..."}
-
-        if "tool_calls" in response.get("message", {}):
-            yield {"status": "processing", "message": "Processing tool calls..."}
-            
-            tool_message = response["message"]
-            tool_result = None
-            
-            # Process all yielded items from the handler
-            async for item in self.handle_tool_calls(tool_message):
-                if isinstance(item, tuple) and len(item) == 2:
-                    # This is the final result tuple (tool_result, tools_used)
-                    tool_result, tools_used = item
-                else:
-                    # This is a status update, forward it
-                    yield item
-            
-            message_dict = {
-                "role": tool_message.get("role", "assistant"),
-                "content": tool_message.get("content", "")
-            }
-            
-            if "tool_calls" in tool_message:
-                message_dict["tool_calls"] = [
-                    {"function": {"name": tc["function"]["name"], "arguments": tc["function"]["arguments"]}} 
-                    for tc in tool_message["tool_calls"]
-                ]
-
-            pre_add_index = len(messages)
-            messages.append(message_dict)
-
-            if isinstance(tool_result, list):
-                messages.extend(tool_result)
-            else:
-                if tool_result:
-                    messages.append(tool_result)
-
-            message.metadata["tools"] = tools_used
-
-            # Estimate token length of new messages
-            ctx_size = self.get_optimal_ctx_size(agent.context_tokens, messages=messages[pre_add_index:])
-            yield {"status": "processing", "message": "Generating final response...", "num_ctx": ctx_size }
-            # Decrease creativity when processing tool call requests
-            response = self.llm.chat(model=self.model, messages=messages, stream=False, options={ "num_ctx": ctx_size }) #, "temperature": 0.5 })
-            message.metadata["eval_count"] += response["eval_count"]
-            message.metadata["eval_duration"] += response["eval_duration"]
-            message.metadata["prompt_eval_count"] += response["prompt_eval_count"]
-            message.metadata["prompt_eval_duration"] += response["prompt_eval_duration"]
-            agent.context_tokens = response["prompt_eval_count"] + response["eval_count"]
-
-        reply = response["message"]["content"]
-        message.response = reply
-        message.metadata["origin"] = agent.get_agent_type()
-        # final_message = {"role": "assistant", "content": reply }
-
-        # # history is provided to the LLM and should not have additional metadata
-        # llm_history.append(final_message)
-
-        # user_history is provided to the REST API and does not include CONTEXT
-        # It does include metadata
-        # final_message["metadata"] = message.metadata
-        # user_history.append({**final_message, "origin": message.metadata["origin"]})
-
-        # Return the REST API with metadata
-        yield {
-            "status": "done", 
-            "message": {
-                **message.model_dump(mode='json'),
-            }
-        }
-
-    # except Exception as e:
-    #     logger.exception({ "model": self.model, "origin": agent_type, "content": content, "error": str(e) })
-    #     yield {"status": "error", "message": f"An error occurred: {str(e)}"}
-
-    # finally:
-    #     self.processing = False
-        self.processing = False
-        return
-
    def run(self, host="0.0.0.0", port=WEB_PORT, **kwargs):
        try:
            if self.ssl_enabled:
--- a/src/utils/agents/base.py
+++ b/src/utils/agents/base.py
@ -271,9 +271,12 @@ class Agent(BaseModel, ABC):
            ):
                # logger.info(f"LLM::Tools: {'done' if response.done else 'processing'} - {response.message}")
                message.status = "streaming"
-                message.response += response.message.content
+                message.chunk = response.message.content
+                message.response += message.chunk
+
                if not response.done:
                    yield message
+
                if response.done:
                    message.metadata["eval_count"] += response.eval_count
                    message.metadata["eval_duration"] += response.eval_duration
@ -351,6 +354,7 @@ class Agent(BaseModel, ABC):
                    },
                    stream=False  # No need to stream the probe
                )
+
                end_time = time.perf_counter()
                message.metadata["timers"]["tool_check"] = f"{(end_time - start_time):.4f}"
                if not response.message.tool_calls:
@ -378,6 +382,7 @@ class Agent(BaseModel, ABC):
                    },
                    stream=False
                )
+
                end_time = time.perf_counter()
                message.metadata["timers"]["non_streaming"] = f"{(end_time - start_time):.4f}"

@ -429,7 +434,8 @@ class Agent(BaseModel, ABC):
                    return

                message.status = "streaming"
-                message.response += response.message.content
+                message.chunk = response.message.content
+                message.response += message.chunk

                if not response.done:
                    yield message
@ -485,7 +491,7 @@ class Agent(BaseModel, ABC):
            message.status = "thinking"
            yield message

-            async for message in self.generate_llm_response(llm, model, message):
+            async for message in self.generate_llm_response(llm=llm, model=model, message=message):
                # logger.info(f"LLM: {message.status} - {f'...{message.response[-20:]}' if len(message.response) > 20 else message.response}")
                if message.status == "error":
                    yield message
--- a/src/utils/agents/job_description.py
+++ b/src/utils/agents/job_description.py
@ -6,6 +6,8 @@ import inspect
 import re
 import json
 import traceback
+import asyncio
+import time

 from . base import Agent, agent_registry, LLMMessage
 from .. conversation import Conversation
@ -167,29 +169,9 @@ class JobDescription(Agent):
    async for message in super().prepare_message(message):
      if message.status != "done":
        yield message
-
    # Always add the job description, user resume, and question
    message.preamble["job_description"] = self.job_description
    message.preamble["resume"] = self.context.user_resume
-
-    excluded = {"job_description"}
-    preamble_types = [f"<|{p}|>" for p in message.preamble.keys() if p not in excluded]
-    preamble_types_AND = " and ".join(preamble_types)
-    preamble_types_OR = " or ".join(preamble_types)
-#     message.preamble["rules"] = f"""\
-# - Create your response based on the information provided in the {preamble_types_AND} sections by incorporating it seamlessly and refer to it using natural language instead of mentioning {preamble_types_OR} or quoting it directly.
-# - If there is no information in these sections, answer based on your knowledge, or use any available tools.
-# - Avoid phrases like 'According to the {preamble_types[0]}' or similar references to the {preamble_types_OR}.
-# """
-
-    resume_agent = self.context.get_agent(agent_type="resume")
-    if resume_agent:
-      message.preamble["question"] = "Respond to:"
-    else:
-      # message.preamble["question"] = "Write a professional resume for the <|job_description|>:"
-      # message.prompt = "Following the <|rules|>, generate a resume given the <|context|>, <|resume|> and <|job_description|>."
-      message.prompt = ""
-
    yield message
    return

@ -201,26 +183,52 @@ class JobDescription(Agent):
    # Generating a resume should not use any tools
    message.tunables.enable_tools = False

-    async for message in super().process_message(llm, model, message):
+    original_prompt = message.prompt
+
+    async for message in super().process_message(llm=llm, model=model, message=message):
      if message.status != "done":
        yield message
+    if message.status == "error":
+       return

    self.system_prompt = system_user_qualifications

    resume_agent = self.context.get_agent(agent_type="resume")
+    fact_check_agent = self.context.get_agent(agent_type="fact_check")
    if not resume_agent:
-      # Switch agent from "Create Resume from Job Desription" mode
-      # to "Answer Questions about Job Description"
-      self.system_prompt = system_job_description
+      if "generate_factual_tailored_resume" in message.metadata and "analyze_candidate_qualifications" in message.metadata["generate_factual_tailored_resume"]:
+        resume_agent = self.context.get_or_create_agent(agent_type="resume", resume=message.response)
+        resume_message = Message(prompt="Show candidate qualifications")
+        qualifications = message.metadata["generate_factual_tailored_resume"]["analyze_candidate_qualifications"]["results"]
+        resume_message.response = f"# Candidate qualifications\n\n```json\n\n{json.dumps(qualifications, indent=2)}\n```\n"
+        resume_message.status = "done"
+        resume_agent.conversation.add(resume_message)

-      # Instantiate the "resume" agent, and seed (or reset) its conversation
-      # with this message.
-      resume_agent = self.context.get_or_create_agent(agent_type="resume", resume=message.response)
-      first_resume_message = message.model_copy()
-      first_resume_message.prompt = "Generate a resume for the job description."
-      resume_agent.conversation.add(first_resume_message)
-      message.response = "Resume generated."
-    
+        resume_message = message.model_copy()
+        resume_message.prompt = "Show generated resume"
+        resume_message.response = message.response
+        resume_message.status = "done"
+        resume_agent.conversation.add(resume_message)
+
+        message.response = "Resume generated."
+        message.actions.append("resume_generated")
+      if "generate_factual_tailored_resume" in message.metadata and "verify_resume" in message.metadata["generate_factual_tailored_resume"]:
+        if "second_pass" in message.metadata["generate_factual_tailored_resume"]["verify_resume"]:
+          verification = message.metadata["generate_factual_tailored_resume"]["verify_resume"]["second_pass"]["results"]
+        else:
+          verification = message.metadata["generate_factual_tailored_resume"]["verify_resume"]["first_pass"]["results"]
+
+        fact_check_agent = self.context.get_or_create_agent(agent_type="fact_check", facts=json.dumps(verification, indent=2))
+        fact_check_message = message.model_copy()
+        fact_check_message.prompt = "Show verification"
+        fact_check_message.response = f"# Resume verfication\n\n```json\n\n{json.dumps(verification, indent=2)}\n```\n"
+        fact_check_message.status = "done"
+        fact_check_agent.conversation.add(fact_check_message)
+
+        message.prompt = original_prompt
+        message.response = "Resume generated and verified."
+        message.actions.append("facts_checked")
+  
    # Return the final message
    yield message
    return
@ -289,6 +297,7 @@ class JobDescription(Agent):
  # Stage 1A: Job Analysis Implementation
  def create_job_analysis_prompt(self, job_description: str) -> tuple[str, str]:
      """Create the prompt for job requirements analysis."""
+      logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
      system_prompt = """
      You are an objective job requirements analyzer. Your task is to extract and categorize the specific skills,
      experiences, and qualifications required in a job description WITHOUT any reference to any candidate.
@ -332,12 +341,15 @@ class JobDescription(Agent):
      prompt = f"Job Description:\n{job_description}"
      return system_prompt, prompt

-  async def analyze_job_requirements(self, message, job_description: str) -> AsyncGenerator[Message, None]:
+  async def analyze_job_requirements(self, message, job_description: str, metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]:
      """Analyze job requirements from job description."""
+      logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
      try:
          system_prompt, prompt = self.create_job_analysis_prompt(job_description)
-          async for message in self.call_llm(message, system_prompt, prompt):
-             if message.status != "done":
+          metadata["system_prompt"] = system_prompt
+          metadata["prompt"] = prompt
+          async for message in self.call_llm(message=message, system_prompt=system_prompt, prompt=prompt):
+            if message.status != "done":
              yield message
          if message.status == "error":
             return
@ -347,6 +359,8 @@ class JobDescription(Agent):
          job_requirements = json.loads(json_str)
          
          self.validate_job_requirements(job_requirements)
+
+          metadata["results"] = job_requirements["job_requirements"]
          
          message.status = "done"
          message.response = json_str
@ -354,39 +368,97 @@ class JobDescription(Agent):
          return

      except Exception as e:
-          message.status = "error"
-          message.response = f"Error in job requirements analysis: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
-          yield message
+          metadata["error"] = message.response
          raise

  # Stage 1B: Candidate Analysis Implementation
  def create_candidate_analysis_prompt(self, resume: str, context: str) -> tuple[str, str]:
      """Create the prompt for candidate qualifications analysis."""
+      # system_prompt = """
+      # You are an objective resume analyzer. Your task is to catalog ALL skills, experiences, and qualifications
+      # present in a candidate's materials WITHOUT any reference to any job description.
+      
+      # ## INSTRUCTIONS:
+      
+      # 1. Analyze ONLY the candidate's resume and context provided.
+      # 2. Create a comprehensive inventory of the candidate's actual qualifications.
+      # 3. DO NOT consider any job requirements - this is a pure candidate analysis task.
+      # 4. For each qualification, cite exactly where in the materials it appears.
+      # 5. DO NOT duplicate or repeat time periods or skills once listed.
+      
+      # ## OUTPUT FORMAT:
+      
+      # ```json
+      # {
+      #   "candidate_qualifications": {
+      #     "technical_skills": [
+      #       {
+      #         "skill": "skill name",
+      #         "evidence": "exact quote from materials",
+      #         "source": "resume or context",
+      #         "expertise_level": "explicit level mentioned or 'unspecified'"
+      #       }
+      #     ],
+      #     "work_experience": [
+      #       {
+      #         "role": "job title",
+      #         "company": "company name",
+      #         "duration": "time period",
+      #         "responsibilities": ["resp1", "resp2"],
+      #         "technologies_used": ["tech1", "tech2"],
+      #         "achievements": ["achievement1", "achievement2"]
+      #       }
+      #     ],
+      #     "education": [
+      #       {
+      #         "degree": "degree name",
+      #         "institution": "institution name",
+      #         "completed": true/false,
+      #         "evidence": "exact quote from materials"
+      #       }
+      #     ],
+      #     "projects": [
+      #       {
+      #         "name": "project name",
+      #         "description": "brief description",
+      #         "technologies_used": ["tech1", "tech2"],
+      #         "evidence": "exact quote from materials"
+      #       }
+      #     ],
+      #     "soft_skills": [
+      #       {
+      #         "skill": "skill name",
+      #         "evidence": "exact quote or inference basis",
+      #         "source": "resume or context"
+      #       }
+      #     ]
+      #   }
+      # }
+      # ```
+      
+      # Be thorough and precise. Include ONLY skills and experiences explicitly mentioned in the materials.
+      # For each entry, provide the exact text evidence from the materials that supports its inclusion.
+      # Do not make assumptions about skills based on job titles or project names - only include skills explicitly mentioned.
+      # """
+
      system_prompt = """
-      You are an objective resume analyzer. Your task is to catalog ALL skills, experiences, and qualifications
-      present in a candidate's materials WITHOUT any reference to any job description.
-      
-      ## INSTRUCTIONS:
-      
-      1. Analyze ONLY the candidate's resume and context provided.
-      2. Create a comprehensive inventory of the candidate's actual qualifications.
-      3. DO NOT consider any job requirements - this is a pure candidate analysis task.
-      4. For each qualification, cite exactly where in the materials it appears.
-      5. DO NOT duplicate or repeat time periods or skills once listed.
-      
-      ## OUTPUT FORMAT:
-      
+      You are an objective resume analyzer. Create a comprehensive inventory of all skills, experiences, and qualifications present in the candidate's materials.
+
+      CORE PRINCIPLES:
+      - Analyze ONLY the candidate's resume and provided context
+      - Focus ONLY on the candidate's actual qualifications
+      - Do not reference any job requirements
+      - Include only explicitly mentioned information
+
+      OUTPUT FORMAT:
      ```json
      {
        "candidate_qualifications": {
          "technical_skills": [
            {
              "skill": "skill name",
-              "evidence": "exact quote from materials",
-              "source": "resume or context",
-              "expertise_level": "explicit level mentioned or 'unspecified'"
+              "evidence_location": "where in resume this appears",
+              "expertise_level": "stated level or 'unspecified'"
            }
          ],
          "work_experience": [
@ -404,37 +476,32 @@ class JobDescription(Agent):
              "degree": "degree name",
              "institution": "institution name",
              "completed": true/false,
-              "evidence": "exact quote from materials"
+              "graduation_date": "date or 'ongoing'"
            }
          ],
          "projects": [
            {
              "name": "project name",
              "description": "brief description",
-              "technologies_used": ["tech1", "tech2"],
-              "evidence": "exact quote from materials"
+              "technologies_used": ["tech1", "tech2"]
            }
          ],
          "soft_skills": [
            {
              "skill": "skill name",
-              "evidence": "exact quote or inference basis",
-              "source": "resume or context"
+              "context": "brief mention of where this appears"
            }
          ]
        }
      }
-      ```
-      
-      Be thorough and precise. Include ONLY skills and experiences explicitly mentioned in the materials.
-      For each entry, provide the exact text evidence from the materials that supports its inclusion.
-      Do not make assumptions about skills based on job titles or project names - only include skills explicitly mentioned.
      """
-      
+
      prompt = f"Resume:\n{resume}\n\nAdditional Context:\n{context}"
      return system_prompt, prompt

  async def call_llm(self, message: Message, system_prompt, prompt, temperature=0.7):
+    logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
+
    messages : List[LLMMessage] = [
      LLMMessage(role="system", content=system_prompt),
      LLMMessage(role="user", content=prompt)
@ -448,6 +515,8 @@ class JobDescription(Agent):
    message.status = "streaming"
    yield message

+    last_chunk_time = 0
+    message.chunk = ""
    message.response = ""
    for response in self.llm.chat(
        model=self.model, 
@ -457,34 +526,42 @@ class JobDescription(Agent):
        },
        stream=True,
    ):
-        if not response:
-            message.status = "error"
-            message.response = "No response from LLM."
-            yield message
-            return
+      if not response:
+          message.status = "error"
+          message.response = "No response from LLM."
+          yield message
+          return

-        message.status = "streaming"
-        message.response += response.message.content
+      message.status = "streaming"
+      message.chunk += response.message.content
+      message.response += response.message.content

-        if not response.done:
-            yield message
+      if not response.done:
+        now = time.perf_counter()
+        if now - last_chunk_time > 0.25:
+           yield message
+           last_chunk_time = now
+           message.chunk = ""

-        if response.done:
-            message.metadata["eval_count"] += response.eval_count
-            message.metadata["eval_duration"] += response.eval_duration
-            message.metadata["prompt_eval_count"] += response.prompt_eval_count
-            message.metadata["prompt_eval_duration"] += response.prompt_eval_duration
-            self.context_tokens = response.prompt_eval_count + response.eval_count
-            message.status = "done"
-            yield message
+      if response.done:
+          message.metadata["eval_count"] += response.eval_count
+          message.metadata["eval_duration"] += response.eval_duration
+          message.metadata["prompt_eval_count"] += response.prompt_eval_count
+          message.metadata["prompt_eval_duration"] += response.prompt_eval_duration
+          self.context_tokens = response.prompt_eval_count + response.eval_count
+          message.chunk = ""
+          message.status = "done"
+          yield message

-  async def analyze_candidate_qualifications(self, message: Message, resume: str, context: str) -> AsyncGenerator[Message, None]:
+  async def analyze_candidate_qualifications(self, message: Message, resume: str, additional_context: str, metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]:
      """Analyze candidate qualifications from resume and context."""
      try:
-          system_prompt, prompt = self.create_candidate_analysis_prompt(resume, context)
+          system_prompt, prompt = self.create_candidate_analysis_prompt(resume, additional_context)
+          metadata["system_prompt"] = system_prompt
+          metadata["prompt"] = prompt
          async for message in self.call_llm(message, system_prompt, prompt):
            if message.status != "done":
-               yield message
+              yield message
          if message.status == "error":
             return
          
@ -494,15 +571,14 @@ class JobDescription(Agent):
          
          # Validate structure
          self.validate_candidate_qualifications(candidate_qualifications)
+
+          metadata["results"] = candidate_qualifications["candidate_qualifications"]
          message.status = "done"
          message.response = json.dumps(candidate_qualifications)
          return
      
      except Exception as e:
-          message.status = "error"
-          message.response = f"Error in candidate qualifications analysis: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
+          metadata["error"] = message.response
          yield message
          raise

@ -579,13 +655,17 @@ class JobDescription(Agent):
      prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}"
      return system_prompt, prompt

-  async def create_skills_mapping(self, message, job_requirements: Dict, candidate_qualifications: Dict) -> AsyncGenerator[Message, None]:
+  async def create_skills_mapping(self, message, job_requirements: Dict, candidate_qualifications: Dict, metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]:
      """Create mapping between job requirements and candidate qualifications."""
+      json_str = ""
      try:
          system_prompt, prompt = self.create_mapping_analysis_prompt(job_requirements, candidate_qualifications)
+          metadata["system_prompt"] = system_prompt
+          metadata["prompt"] = prompt
+
          async for message in self.call_llm(message, system_prompt, prompt):
-             if message != "done":
-                yield message
+            if message != "done":
+              yield message
          if message.status == "error":
             return
          
@ -595,6 +675,8 @@ class JobDescription(Agent):
          
          # Validate structure
          self.validate_skills_mapping(skills_mapping)
+
+          metadata["skills_mapping"] = skills_mapping["skills_mapping"]
          
          message.status = "done"
          message.response = json_str
@ -602,10 +684,7 @@ class JobDescription(Agent):
          return
      
      except Exception as e:
-          message.status = "error"
-          message.response = f"Error in skills mapping analysis: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
+          metadata["error"] = json_str
          yield message
          raise

@ -665,27 +744,19 @@ class JobDescription(Agent):
      prompt += f"Original Resume Header:\n{original_header}"
      return system_prompt, prompt

-  async def generate_tailored_resume(self, message, skills_mapping: Dict, candidate_qualifications: Dict, original_header: str) -> AsyncGenerator[Message, None]:
+  async def generate_tailored_resume(self, message, skills_mapping: Dict, candidate_qualifications: Dict, original_header: str, metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]:
      """Generate a tailored resume based on skills mapping."""
-      try:
-          system_prompt, prompt = self.create_resume_generation_prompt(skills_mapping, candidate_qualifications, original_header)
-          async for message in self.call_llm(message, system_prompt, prompt, temperature=0.4):  # Slightly higher temperature for better writing
-            if message.status != "done":
-              yield message
-          if message.status == "error":
-             return
-          message.status = "done"
+      system_prompt, prompt = self.create_resume_generation_prompt(skills_mapping, candidate_qualifications, original_header)
+      metadata["system_prompt"] = system_prompt
+      metadata["prompt"] = prompt
+      async for message in self.call_llm(message, system_prompt, prompt, temperature=0.4):  # Slightly higher temperature for better writing
+        if message.status != "done":
          yield message
+      if message.status == "error":
          return
-      
-      except Exception as e:
-          message.status = "error"
-          message.response = f"Error in resume generation: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
-          yield message
-          raise
-      
+      metadata["results"] = message.response
+      yield message
+      return      

  # Stage 3: Verification Implementation
  def create_verification_prompt(self, generated_resume: str, skills_mapping: Dict, candidate_qualifications: Dict) -> tuple[str, str]:
@ -769,30 +840,31 @@ class JobDescription(Agent):
      prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}"
      return system_prompt, prompt

-  async def verify_resume(self, message: Message, generated_resume: str, skills_mapping: Dict, candidate_qualifications: Dict) -> AsyncGenerator[Message, None]:
+  async def verify_resume(self, message: Message, generated_resume: str, skills_mapping: Dict, candidate_qualifications: Dict, metadata: Dict[str, Any]) -> AsyncGenerator[Message, None]:
      """Verify the generated resume for accuracy against original materials."""
      try:
          system_prompt, prompt = self.create_verification_prompt(generated_resume, skills_mapping, candidate_qualifications)
+          metadata["system_prompt"] = system_prompt
+          metadata["prompt"] = prompt
          async for message in self.call_llm(message, system_prompt, prompt):
            if message.status != "done":
              yield message
+          if message.status == "error":
+             return
          
          # Extract JSON from response
          json_str = self.extract_json_from_text(message.response)
-          
+          metadata["results"] = json.loads(json_str)["verification_results"]
          message.status = "done"
          message.response = json_str
          yield message
          return
      except Exception as e:
-          message.status = "error"
-          message.response = f"Error in resume verification: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
+          metadata["error"] = message.response
          yield message
          raise

-  async def correct_resume_issues(self, message: Message, generated_resume: str, verification_results: Dict, skills_mapping: Dict, candidate_qualifications: Dict, original_header: str)  -> AsyncGenerator[Message, None]:
+  async def correct_resume_issues(self, message: Message, generated_resume: str, verification_results: Dict, skills_mapping: Dict, candidate_qualifications: Dict, original_header: str, metadata: Dict[str, Any])  -> AsyncGenerator[Message, None]:
      """Correct issues in the resume based on verification results."""
      if verification_results["verification_results"]["overall_assessment"] == "APPROVED":
          message.status = "done"
@ -810,7 +882,10 @@ class JobDescription(Agent):
      2. Ensure all corrections maintain factual accuracy based on the skills mapping
      3. Do not introduce any new claims or skills not present in the verification data
      4. Maintain the original format and structure of the resume
-      
+      5. DO NOT directly list the verification report or skills mapping
+      6. Provide ONLY the fully corrected resume
+      7. DO NOT provide Verification Results or other additional information beyond the corrected resume
+           
      ## PROCESS:
      
      1. For each issue in the verification report:
@ -832,195 +907,206 @@ class JobDescription(Agent):
      prompt += f"Candidate Qualifications:\n{json.dumps(candidate_qualifications, indent=2)}\n\n"
      prompt += f"Original Resume Header:\n{original_header}"
      
-      try:
-          async for message in self.call_llm(message, prompt, system_prompt, temperature=0.3):
-            if message.status != "done":
-              yield message
-          yield message
-          if message.status == "error":
-             return
+      metadata["system_prompt"] = system_prompt
+      metadata["prompt"] = prompt

-      except Exception as e:
-          message.status = "error"
-          message.response = f"Error in resume correction: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
+      async for message in self.call_llm(message, prompt, system_prompt, temperature=0.3):
+        if message.status != "done":
          yield message
-          raise
+      if message.status == "error":
+          return
+      metadata["results"] = message.response
+      yield message

  async def generate_factual_tailored_resume(self, message: Message, job_description: str, resume: str, additional_context: str = "") -> AsyncGenerator[Message, None]:
-      """
-      Main function to generate a factually accurate tailored resume.
-      
-      Args:
-          job_description: The job description text
-          resume: The candidate's original resume text
-          additional_context: Any additional context about the candidate (optional)
-          
-      Returns:
-          Dict containing the generated resume and supporting analysis
-      """
-      try:
-          message.status = "thinking"
-          logger.info(message.response)
-          yield message
+    """
+    Main function to generate a factually accurate tailored resume.
+    
+    Args:
+        job_description: The job description text
+        resume: The candidate's original resume text
+        additional_context: Any additional context about the candidate (optional)
+        
+    Returns:
+        Dict containing the generated resume and supporting analysis
+    """
+    message.status = "thinking"
+    logger.info(message.response)
+    yield message

-          # Stage 1A: Analyze job requirements
-          message.response = "Multi-stage RAG resume generation process: Stage 1A: Analyzing job requirements"
-          logger.info(message.response)
-          yield message
+    message.metadata["generate_factual_tailored_resume"] = {}
+    metadata = message.metadata["generate_factual_tailored_resume"]
+    # Stage 1A: Analyze job requirements
+    message.response = "Multi-stage RAG resume generation process: Stage 1A: Analyzing job requirements"
+    logger.info(message.response)
+    yield message
+    metadata["job_requirements"] = {}
+    async for message in self.analyze_job_requirements(message, job_description, metadata["job_requirements"]):
+      if message.status != "done":
+        yield message
+    if message.status == "error":
+      return
+    job_requirements = json.loads(message.response)

-          async for message in self.analyze_job_requirements(message, job_description):
-            if message.status != "done":
-              yield message
-          if message.status == "error":
-            return
+    # Stage 1B: Analyze candidate qualifications
+    message.status = "thinking"
+    message.response = "Multi-stage RAG resume generation process: Stage 1B: Analyzing candidate qualifications"
+    logger.info(message.response)
+    yield message
+    metadata["analyze_candidate_qualifications"] = {
+        "additional_context": additional_context
+    }
+    async for message in self.analyze_candidate_qualifications(
+      message=message,
+      resume=resume,
+      additional_context=additional_context, 
+      metadata=metadata["analyze_candidate_qualifications"]):
+      if message.status != "done":
+        yield message
+    if message.status == "error":
+      return
+    candidate_qualifications = json.loads(message.response)
+    
+    # Stage 1C: Create skills mapping
+    message.status = "thinking"
+    message.response = "Multi-stage RAG resume generation process: Stage 1C: Creating skills mapping"
+    logger.info(message.response)
+    yield message
+    metadata["skills_mapping"] = {}
+    async for message in self.create_skills_mapping(
+        message=message, 
+        job_requirements=job_requirements, 
+        candidate_qualifications=candidate_qualifications, 
+        metadata=metadata["skills_mapping"]):
+      if message.status != "done":
+        yield message
+    if message.status == "error":
+      return
+    skills_mapping = json.loads(message.response)
+    
+    # Extract header from original resume
+    original_header = self.extract_header_from_resume(resume)
+    
+    # Stage 2: Generate tailored resume
+    message.status = "thinking"
+    message.response = "Multi-stage RAG resume generation process: Stage 2: Generating tailored resume"
+    logger.info(message.response)
+    yield message
+    metadata["generate_tailored_resume"] = {}
+    async for message in self.generate_tailored_resume(
+      message=message,
+      skills_mapping=skills_mapping,
+      candidate_qualifications=candidate_qualifications, 
+      original_header=original_header, 
+      metadata=metadata["generate_tailored_resume"]):
+      if message.status != "done":
+        yield message
+    if message.status == "error":
+      return
+    generated_resume = message.response

-          job_requirements = json.loads(message.response)
-          message.metadata["job_requirements"] = job_requirements
+    # Stage 3: Verify resume
+    message.status = "thinking"
+    message.response = "Multi-stage RAG resume generation process: Stage 3: Verifying resume for accuracy"
+    logger.info(message.response)
+    yield message
+    metadata["verify_resume"] = {
+        "first_pass": {}
+    }
+    async for message in self.verify_resume(
+        message=message, 
+        generated_resume=generated_resume, 
+        skills_mapping=skills_mapping, 
+        candidate_qualifications=candidate_qualifications, 
+        metadata=metadata["verify_resume"]["first_pass"]):
+      if message.status != "done":
+        yield message
+    if message.status == "error":
+      return
+    
+    verification_results = json.loads(message.response)
+    
+    # Handle corrections if needed
+    if verification_results["verification_results"]["overall_assessment"] == "NEEDS REVISION":
+        message.status = "thinking"
+        message.response = "Correcting issues found in verification"
+        logger.info(message.response)
+        yield message

-          # Stage 1B: Analyze candidate qualifications
-          message.status = "thinking"
-          message.response = "Multi-stage RAG resume generation process: Stage 1B: Analyzing candidate qualifications"
-          logger.info(message.response)
-          yield message
-
-          async for message in self.analyze_candidate_qualifications(message, resume, additional_context):
-            if message.status != "done":
-              yield message
-          if message.status == "error":
-             return
-
-          candidate_qualifications = json.loads(message.response)
-          message.metadata["candidate_qualifications"] = job_requirements
-          
-          # Stage 1C: Create skills mapping
-          message.status = "thinking"
-          message.response = "Multi-stage RAG resume generation process: Stage 1C: Creating skills mapping"
-          logger.info(message.response)
-          yield message
-
-          async for message in self.create_skills_mapping(message, job_requirements, candidate_qualifications):
-            if message.status != "done":
-              yield message
-          if message.status == "error":
-             return
-            
-          skills_mapping = json.loads(message.response)
-          message.metadata["skills_mapping"] = skills_mapping
-          
-          # Extract header from original resume
-          original_header = self.extract_header_from_resume(resume)
-          
-          # Stage 2: Generate tailored resume
-          message.status = "thinking"
-          message.response = "Multi-stage RAG resume generation process: Stage 2: Generating tailored resume"
-          logger.info(message.response)
-          yield message
-
-          async for message in self.generate_tailored_resume(message, skills_mapping, candidate_qualifications, original_header):
-            if message.status != "done":
-              yield message
-          if message.status == "error":
-             return
-
-          generated_resume = message.response
-          message.metadata["generated_resume"] = {
-             "first_pass": generated_resume
-          }
-
-          # Stage 3: Verify resume
-          message.status = "thinking"
-          message.response = "Multi-stage RAG resume generation process: Stage 3: Verifying resume for accuracy"
-          logger.info(message.response)
-          yield message
-
-          async for message in self.verify_resume(message, generated_resume, skills_mapping, candidate_qualifications):
-            if message.status != "done":
-              yield message
-          if message.status == "error":
-             return
-          
-          verification_results = json.loads(message.response)
-          message.metadata["verification_results"] = {
-             "first_pass": verification_results
-          }
-          
-          # Handle corrections if needed
-          if verification_results["verification_results"]["overall_assessment"] == "NEEDS REVISION":
-              message.status = "thinking"
-              message.response = "Correcting issues found in verification"
-              logger.info(message.response)
-              yield message
-
-              async for message in self.correct_resume_issues(
-                message=message,
-                generated_resume=generated_resume, 
-                verification_results=verification_results, 
-                skills_mapping=skills_mapping, 
-                candidate_qualifications=candidate_qualifications,
-                original_header=original_header
-              ):
-                if message.status != "done":
-                  yield message
-              if message.status == "error":
-                return
-
-              generated_resume = message.response
-              message.metadata["generated_resume"]["second_pass"] = generated_resume
-
-              # Re-verify after corrections
-              message.status = "thinking"
-              message.response = "Re-verifying corrected resume"
-              logger.info(message.response)
-              async for message in self.verify_resume(
-                  message=message, 
-                  generated_resume=generated_resume, 
-                  skills_mapping=skills_mapping, 
-                  candidate_qualifications=candidate_qualifications):
-                if message.status != "done":
-                   yield message
-              if message.status == "error":
-                 return
-              verification_results = json.loads(message.response)
-              message.metadata["verification_results"]["second_pass"] = verification_results
-
-          # Return the final results
-          message.status = "done"
-          message.response = generated_resume
-          yield message
-
-          logger.info("Resume generation process completed successfully")
+        metadata["correct_resume_issues"] = {}
+        async for message in self.correct_resume_issues(
+          message=message,
+          generated_resume=generated_resume, 
+          verification_results=verification_results, 
+          skills_mapping=skills_mapping, 
+          candidate_qualifications=candidate_qualifications,
+          original_header=original_header,
+          metadata=metadata["correct_resume_issues"]
+        ):
+          if message.status != "done":
+            yield message
+        if message.status == "error":
          return
-          
-      except Exception as e:
-          message.status = "error"
-          logger.info(message.response)
-          message.response = f"Error in resume generation process: {str(e)}"
-          logger.error(message.response)
-          logger.error(traceback.format_exc())
-          yield message
-          raise
+
+        generated_resume = message.response
+
+        # Re-verify after corrections
+        message.status = "thinking"
+        message.response = "Re-verifying corrected resume"
+        yield message
+
+        logger.info(message.response)
+        metadata["verify_resume"]["second_pass"] = {}
+        async for message in self.verify_resume(
+            message=message, 
+            generated_resume=generated_resume, 
+            skills_mapping=skills_mapping, 
+            candidate_qualifications=candidate_qualifications,
+            metadata=metadata["verify_resume"]["second_pass"]):
+          if message.status != "done":
+              yield message
+        if message.status == "error":
+            return
+        verification_results = json.loads(message.response)
+
+    # Return the final results
+    message.status = "done"
+    message.response = generated_resume
+    yield message
+
+    logger.info("Resume generation process completed successfully")
+    return

  # Main orchestration function
  async def generate_llm_response(self, llm: Any, model: str, message: Message, temperature=0.7) -> AsyncGenerator[Message, None]:
    logger.info(f"{self.agent_type} - {inspect.stack()[0].function}")
    
+    original_message = message.model_copy()
+
    self.llm = llm
    self.model = model
-
    self.metrics.generate_count.labels(agent=self.agent_type).inc()
    with self.metrics.generate_duration.labels(agent=self.agent_type).time():
-        job_description = message.preamble["job_description"]
-        resume = message.preamble["resume"]
-        additional_context = message.preamble["context"]
+      job_description = message.preamble["job_description"]
+      resume = message.preamble["resume"]
+      additional_context = message.preamble["context"]

+      try:
        async for message in self.generate_factual_tailored_resume(message=message, job_description=job_description, resume=resume, additional_context=additional_context):
-           if message.status != "done":
+          if message.status != "done":
            yield message
-        
+        message.prompt = original_message.prompt
+        yield message
+        return
+      except Exception as e:
+        message.status = "error"
+        message.response = f"Error in resume generation process: {str(e)}"
+        logger.error(message.response)
+        logger.error(traceback.format_exc())
        yield message
        return

+       
+
 # Register the base agent
 agent_registry.register(JobDescription._agent_type, JobDescription)
--- a/src/utils/defines.py
+++ b/src/utils/defines.py
@ -4,6 +4,8 @@ ollama_api_url="http://ollama:11434"  # Default Ollama local endpoint
 #model = "deepseek-r1:7b" # Tool calls don"t work
 #model="mistral:7b"       # Tool calls don"t work
 #model = "llama3.2"
+#model = "qwen3:8b"       # Requires newer ollama
+#model = "gemma3:4b"      # Requires newer ollama
 model = os.getenv("MODEL_NAME", "qwen2.5:7b")
 embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")
 persist_directory = os.getenv("PERSIST_DIR", "/opt/backstory/chromadb")
--- a/src/utils/message.py
+++ b/src/utils/message.py
@ -1,6 +1,7 @@
 from pydantic import BaseModel, Field  # type: ignore
 from typing import Dict, List, Optional, Any
 from datetime import datetime, timezone
+from asyncio import Event

 class Tunables(BaseModel):
    enable_rag : bool = Field(default=True)     # Enable RAG collection chromadb matching
@ -8,6 +9,7 @@ class Tunables(BaseModel):
    enable_context : bool = Field(default=True) # Add <|context|> field to message

 class Message(BaseModel):
+    model_config = {"arbitrary_types_allowed": True} # Allow Event
    # Required
    prompt: str                     # Query to be answered

@ -32,7 +34,9 @@ class Message(BaseModel):
    network_bytes: int = 0          # Total bytes sent while streaming packets
    actions: List[str] = []         # Other session modifying actions performed while processing the message 
    timestamp: datetime = datetime.now(timezone.utc)
-
+    chunk: str = Field(default="")  # This needs to be serialized so it will be sent in responses
+    partial_response: str = Field(default="") # This needs to be serialized so it will be sent in responses on timeout
+    
    def add_action(self, action: str | list[str]) -> None:
        """Add a actions(s) to the message."""
        if isinstance(action, str):