Pre claude rewrite

2025-04-17 15:04:10 -07:00 · 2025-04-17 15:04:10 -07:00 · eb2629bcce
commit eb2629bcce
parent c00f3068fa
15 changed files with 2203 additions and 257 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,5 @@ cache/**
 jupyter/**
 ollama/**
 sessions/**
+chromadb/**
+chromadb-prod/**
--- a/65
+++ b/65
@ -139,14 +139,7 @@ RUN python setup.py clean --all bdist_wheel --linux
 # * ollama-ipex-llm
 # * src/server.py     - model server supporting RAG and fine-tuned models
 #
-# Agents using server:
-# * src/web-ui.py     - REACT server (backstory.ketrenos.com)
-# * src/irc.py        - IRC backend (irc.libera.chat #backstory-test)
-# * src/cli.py        - Command line chat
-#
-# Utilities:
-# * src/training-fine-tune.py  - Perform fine-tuning on currated documents
-FROM ubuntu:oracular AS backstory
+FROM ubuntu:oracular AS llm-base

 COPY --from=python-build /opt/python /opt/python

@ -184,10 +177,22 @@ RUN apt-get update \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

-WORKDIR /opt/backstory
-
 RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2

+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libncurses6 \
+    rsync \ 
+    jq \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
+RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
+RUN usermod -aG ze-monitor root 
+
+WORKDIR /opt/backstory
+
 # Setup the ollama python virtual environment
 RUN python3 -m venv --system-site-packages /opt/backstory/venv

@ -204,7 +209,6 @@ RUN { \
 # Activate the pip environment on all shell calls
 SHELL [ "/opt/backstory/shell" ]

-
 # From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
 RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
 RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
@ -243,29 +247,17 @@ RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
 # NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env
 RUN pip install einops diffusers # Required for IPEX optimize(), which is required to convert from Params4bit

+# Needed by src/utils/chroma.py
+RUN pip install watchdog
+
 # Install packages needed for stock.py
 RUN pip install yfinance pyzt geopy PyHyphen nltk

-# While running in development mode via bind mounts, don't copy
-# the source or follow on containers will always rebuild whenever
-# the source changes.
-#COPY /src/ /opt/backstory/src/
+FROM llm-base AS backstory
+
 COPY /src/requirements.txt /opt/backstory/src/requirements.txt
 RUN pip install -r /opt/backstory/src/requirements.txt
-
-SHELL [ "/bin/bash", "-c" ]
-
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    libncurses6 \
-    rsync \ 
-    jq \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
-RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
-RUN usermod -aG ze-monitor root 
+COPY /src/ /opt/backstory/src/

 SHELL [ "/bin/bash", "-c" ]

@ -288,6 +280,14 @@ RUN { \
    echo '    exec /bin/bash'; \
    echo '  fi' ; \
    echo 'else'; \
+    echo '  if [[ "${PRODUCTION}" -eq 0 ]]; then'; \
+    echo '    while true; do'; \
+    echo '      cd /opt/backstory/frontend'; \
+    echo '      echo "Launching Backstory React Frontend..."'; \
+    echo '      npm start "${@}" || echo "Backstory frontend died. Restarting in 3 seconds."'; \
+    echo '      sleep 3'; \
+    echo '    done &' ; \
+    echo '  fi' ; \
    echo '  while true; do'; \
    echo '    echo "Launching Backstory server..."'; \
    echo '    python src/server.py "${@}" || echo "Backstory server died. Restarting in 3 seconds."'; \
@ -332,9 +332,8 @@ RUN apt-get update \
 WORKDIR /opt/ollama

 # Download the nightly ollama release from ipex-llm
-#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
-#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
 ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0/ollama-ipex-llm-2.2.0-ubuntu.tgz
+#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250415-ubuntu.tgz
 RUN wget -qO - ${OLLAMA_VERSION} | \
    tar --strip-components=1 -C . -xzv 

@ -414,7 +413,7 @@ ENV PATH=/opt/ollama:${PATH}

 ENTRYPOINT [ "/entrypoint.sh" ]

-FROM backstory AS jupyter
+FROM llm-base AS jupyter

 SHELL [ "/opt/backstory/shell" ]

@ -425,6 +424,8 @@ RUN pip install \
    && jupyter lab build --dev-build=False --minimize=False
 # END setup Jupyter

+COPY /src/requirements.txt /opt/backstory/src/requirements.txt
+
 RUN pip install -r /opt/backstory/src/requirements.txt

 SHELL [ "/bin/bash", "-c" ]
--- a/README.md
+++ b/README.md
@ -1,12 +1,57 @@
 # Backstory

-Backstory is an AI Resume agent that provides context into a diverse career narative.
+Backstory is an AI Resume agent that provides context into a diverse career narative. Backstory will take a collection of documents about a person and provide:

-This project provides an AI chat client. While it can run a variety of LLM models, it is currently running Qwen2.5:7b. In addition to the standard model, enhanced with a RAG expert system that will chunk and embed any text files in `./docs`. It also exposes several utility tools for the LLM to use to obtain real-time data.
+* Through the use of several custom Language Processing Modules (LPM), develop a comprehensive set of test and validation data based on the input documents. While manual review of content should be performed to ensure accuracy, several LLM techniques are employed in the LPM in order to isolate and remove hallucinations and inaccuracies in the test and validation data.
+* Utilizing quantized low-rank adaption (QLoRA) and parameter effecient tine tuning (PEFT,) provide a hyper parameter tuned and customized LLM for use in chat and content creation scenarios with expert knowledge about the individual. 
+* Post-training, utilize additional RAG content to further enhance the information domain used in conversations and content generation.
+* An integrated document publishing work flow that will transform a "Job Description" into a customized "Resume" for the person the LLM has been trained on.
+* "Fact Check" the resulting resume against the RAG content directly provided by the user in order to remove hallucinations.

-Internally, it is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
+While it can run a variety of LLM models, Backstory is currently running Qwen2.5:7b. In addition to the standard model, the chat pipeline also exposes several utility tools for the LLM to use to obtain real-time data.

-NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/backstory/issues)--I have some routines I can put in, but don't have a way to test them. 
+Internally, Backstory is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
+
+This system was built to run on commodity hardware, for example the Intel Arc B580 GPU with 12G of RAM.
+
+# Zero to Hero
+
+Before you spend too much time learning how to customize Backstory, you may want to see it in action with your own information. Fine-tuning the LLM with your data can take a while, so you might want to see what the system can do just by utilizing retrieval-augmented generation.
+
+Backstory works by generating a set of facts about you. Those facts can be exposed to the LLM via RAG, or baked into the LLM by fine-tuning. In either scenario, Backstory needs to know your relationship with a given fact.
+
+To facilitate this, Backstory expects the documents it reads to be marked with information that highlights your role in relation to the document. That information is either stored within each document as [Front Matter (YAML)](https://jekyllrb.com/docs/front-matter/) or as a YAML sidecar file (a file with the same name as the content, plus the extension .yml)
+
+The two key items expected in the front matter / sidecar are:
+
+```
+---
+person:
+role:
+---
+```
+
+For example, a file `resume.md` could have the following either as front matter or in the file `resume.md.yml`:
+
+```
+---
+person: James Ketrenos
+role: This resume is about James Ketrenos and refers to his work history.
+---
+```
+
+A document from a project you worked on, in my case `backstory`, could have the following front matter:
+
+```
+---
+person: James Ketrenos
+role: Designed, built, and deployed the application described in this document.
+---
+```
+
+During both RAG extraction and during fine-tuning, that context information is provided to the LLM so it can better respond to queries about the user and that user's specific roles.
+
+This project is seeded with a minimal resume and document about backstory. Those are present in the `docs/` directory, which is where you will place your content. If you do not replace anything and run the system as-is, Backstory will be able to provide information about me via RAG (there is fine-tuned data provided in this project archive.)

 # Installation

@ -14,12 +59,6 @@ This project uses docker containers to build. As this was originally written to

 NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)

-## Want to run under WSL2? No can do...
-
-https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
-
-The A- and B-series discrete GPUs do not support SR-IOV, required for the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
-
 ## Building

 NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
--- a/chromadb-prod/.keep
+++ b/chromadb-prod/.keep
--- a/chromadb/.keep
+++ b/chromadb/.keep
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,6 +8,43 @@ services:
    restart: "no"
    env_file:
      - .env
+    environment:
+      - PRODUCTION=0
+      - MODEL_NAME=${MODEL_NAME:-qwen2.5:3b}
+    devices:
+      - /dev/dri:/dev/dri
+    depends_on:
+      - ollama
+    networks:
+      - internal
+    ports:
+      - 8912:8911 # Flask React server
+      - 3000:3000 # REACT expo while developing frontend
+    volumes:
+      - ./cache:/root/.cache                # Persist all models and GPU kernel cache
+      - ./sessions:/opt/backstory/sessions:rw    # Persist sessions
+      - ./chromadb:/opt/backstory/chromadb:rw    # Persist ChromaDB
+      - ./docs:/opt/backstory/docs:ro            # Live mount of RAG content
+      - ./src:/opt/backstory/src:rw              # Live mount server src 
+      - ./frontend:/opt/backstory/frontend:rw    # Live mount frontend src 
+    cap_add: # used for running ze-monitor within container
+      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
+      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
+      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
+
+  backstory-prod:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: backstory
+    image: backstory
+    container_name: backstory-prod
+    restart: "always"
+    env_file:
+      - .env
+    environment:
+      - PRODUCTION=1
+      - MODEL_NAME=${MODEL_NAME:-qwen2.5:7b}
    devices:
      - /dev/dri:/dev/dri
    depends_on:
@ -16,12 +53,11 @@ services:
      - internal
    ports:
      - 8911:8911 # Flask React server
-      - 3000:3000 # REACT expo while developing frontend
    volumes:
      - ./cache:/root/.cache                # Persist all models and GPU kernel cache
+      - ./chromadb-prod:/opt/backstory/chromadb:rw    # Persist ChromaDB
      - ./sessions:/opt/backstory/sessions:rw    # Persist sessions
      - ./docs:/opt/backstory/docs:ro            # Live mount of RAG content
-      - ./src:/opt/backstory/src:rw              # Live mount server src 
      - ./frontend:/opt/backstory/frontend:rw    # Live mount frontend src 
    cap_add: # used for running ze-monitor within container
      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
@ -34,7 +70,7 @@ services:
      dockerfile: Dockerfile
      target: ollama
    image: ollama
-    restart: "no"
+    restart: "always"
    env_file:
      - .env
    environment:
@ -60,6 +96,8 @@ services:
      dockerfile: Dockerfile
      target: jupyter
    image: jupyter
+    container_name: jupyter
+    restart: "always"
    env_file:
      - .env
    devices:
@ -95,6 +133,7 @@ services:
      dockerfile: Dockerfile
      target: miniircd
    image: miniircd
+    restart: "no"
    env_file:
      - .env
    devices:
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@ -8,6 +8,9 @@
    "@fontsource/roboto": "^5.2.5",
    "@mui/icons-material": "^7.0.1",
    "@mui/material": "^7.0.1",
+    "@tensorflow/tfjs": "^4.22.0",
+    "@tensorflow/tfjs-backend-webgl": "^4.22.0",
+    "@tensorflow/tfjs-tsne": "^0.2.0",
    "@testing-library/dom": "^10.4.0",
    "@testing-library/jest-dom": "^6.6.3",
    "@testing-library/react": "^16.2.0",
@ -26,6 +29,7 @@
    "rehype-katex": "^7.0.1",
    "remark-gfm": "^4.0.1",
    "remark-math": "^6.0.0",
+    "tsne-js": "^1.0.3",
    "typescript": "^4.9.5",
    "web-vitals": "^2.1.4"
  },
@ -52,5 +56,8 @@
      "last 1 firefox version",
      "last 1 safari version"
    ]
+  },
+  "devDependencies": {
+    "@types/plotly.js": "^2.35.5"
  }
 }
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -36,6 +36,7 @@ import { Message, MessageList } from './Message';
 import { MessageData } from './MessageMeta';
 import { SeverityType } from './Snack';
 import { ContextStatus } from './ContextStatus';
+import { VectorVisualizer, ResultData } from './VectorVisualizer';


 import './App.css';
@ -103,13 +104,11 @@ type SystemInfo = {
  "CPU": string
 };

-
-
 const getConnectionBase = (loc: any): string => {
  if (!loc.host.match(/.*battle-linux.*/)) {
    return loc.protocol + "//" + loc.host;
  } else {
-    return loc.protocol + "//battle-linux.ketrenos.com:8911";
+    return loc.protocol + "//battle-linux.ketrenos.com:8912";
  }
 }

@ -320,7 +319,7 @@ const App = () => {
  const conversationRef = useRef<any>(null);
  const [processing, setProcessing] = useState(false);
  const [sessionId, setSessionId] = useState<string | undefined>(undefined);
-  const [loc,] = useState<Location>(window.location)
+  const [connectionBase,] = useState<string>(getConnectionBase(window.location))
  const [mobileOpen, setMobileOpen] = useState(false);
  const [isClosing, setIsClosing] = useState(false);
  const [snackOpen, setSnackOpen] = useState(false);
@ -344,6 +343,7 @@ const App = () => {
  const [resume, setResume] = useState<MessageData | undefined>(undefined);
  const [facts, setFacts] = useState<MessageData | undefined>(undefined);
  const timerRef = useRef<any>(null);
+  const [result, setResult] = useState<ResultData | undefined>(undefined);

  const startCountdown = (seconds: number) => {
    if (timerRef.current) clearInterval(timerRef.current);
@ -406,7 +406,7 @@ const App = () => {
    if (systemInfo !== undefined || sessionId === undefined) {
      return;
    }
-    fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
+    fetch(connectionBase + `/api/system-info/${sessionId}`, {
      method: 'GET',
      headers: {
        'Content-Type': 'application/json',
@ -420,7 +420,32 @@ const App = () => {
        console.error('Error obtaining system information:', error);
        setSnack("Unable to obtain system information.", "error");
      });
-  }, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
+  }, [systemInfo, setSystemInfo, connectionBase, setSnack, sessionId])
+
+  // Get the collection to visualize
+  useEffect(() => {
+    if (result !== undefined || sessionId === undefined) {
+      return;
+    }
+    const fetchCollection = async () => {
+      try {
+        const response = await fetch(connectionBase + `/api/tsne/${sessionId}`, {
+          method: 'PUT',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({ dimensions: 3 }),
+        });
+        const data = await response.json();
+        setResult(data);
+      } catch (error) {
+        console.error('Error obtaining collection information:', error);
+        setSnack("Unable to obtain collection information.", "error");
+      };
+    };
+
+    fetchCollection();
+  }, [result, setResult, connectionBase, setSnack, sessionId])

  // Get the About markdown
  useEffect(() => {
@ -451,7 +476,7 @@ const App = () => {

  // Update the context status
  const updateContextStatus = useCallback(() => {
-    fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
+    fetch(connectionBase + `/api/context-status/${sessionId}`, {
      method: 'GET',
      headers: {
        'Content-Type': 'application/json',
@ -465,14 +490,14 @@ const App = () => {
        console.error('Error getting context status:', error);
        setSnack("Unable to obtain context status.", "error");
      });
-  }, [setContextStatus, loc, setSnack, sessionId]);
+  }, [setContextStatus, connectionBase, setSnack, sessionId]);

  // Set the initial chat history to "loading" or the welcome message if loaded.
  useEffect(() => {
    if (sessionId === undefined) {
      setConversation([loadingMessage]);
    } else {
-      fetch(getConnectionBase(loc) + `/api/history/${sessionId}`, {
+      fetch(connectionBase + `/api/history/${sessionId}`, {
        method: 'GET',
        headers: {
          'Content-Type': 'application/json',
@ -492,17 +517,17 @@ const App = () => {
        });
      updateContextStatus();
    }
-  }, [sessionId, setConversation, updateContextStatus, loc, setSnack]);
+  }, [sessionId, setConversation, updateContextStatus, connectionBase, setSnack]);

  // Extract the sessionId from the URL if present, otherwise
  // request a sessionId from the server.
  useEffect(() => {
-    const url = new URL(loc.href);
+    const url = new URL(window.location.href);
    const pathParts = url.pathname.split('/').filter(Boolean);

    if (!pathParts.length) {
      console.log("No session id -- creating a new session")
-      fetch(getConnectionBase(loc) + `/api/context`, {
+      fetch(connectionBase + `/api/context`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
@ -520,7 +545,7 @@ const App = () => {
      setSessionId(pathParts[0]);
    }

-  }, [setSessionId, loc]);
+  }, [setSessionId, connectionBase]);

  // If the systemPrompt has not been set, fetch it from the server
  useEffect(() => {
@ -529,7 +554,7 @@ const App = () => {
    }
    const fetchTunables = async () => {
      // Make the fetch request with proper headers
-      const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, {
+      const response = await fetch(connectionBase + `/api/tunables/${sessionId}`, {
        method: 'GET',
        headers: {
          'Content-Type': 'application/json',
@ -544,7 +569,7 @@ const App = () => {
    }

    fetchTunables();
-  }, [sessionId, serverSystemPrompt, setServerSystemPrompt, loc]);
+  }, [sessionId, serverSystemPrompt, setServerSystemPrompt, connectionBase]);

  // If the tools have not been set, fetch them from the server
  useEffect(() => {
@ -554,7 +579,7 @@ const App = () => {
    const fetchTools = async () => {
      try {
        // Make the fetch request with proper headers
-        const response = await fetch(getConnectionBase(loc) + `/api/tools/${sessionId}`, {
+        const response = await fetch(connectionBase + `/api/tools/${sessionId}`, {
          method: 'GET',
          headers: {
            'Content-Type': 'application/json',
@ -573,7 +598,7 @@ const App = () => {
    }

    fetchTools();
-  }, [sessionId, tools, setTools, setSnack, loc]);
+  }, [sessionId, tools, setTools, setSnack, connectionBase]);

  // If the jobDescription and resume have not been set, fetch them from the server
  useEffect(() => {
@ -586,7 +611,7 @@ const App = () => {
    const fetchResume = async () => {
      try {
        // Make the fetch request with proper headers
-        const response = await fetch(getConnectionBase(loc) + `/api/resume/${sessionId}`, {
+        const response = await fetch(connectionBase + `/api/resume/${sessionId}`, {
          method: 'GET',
          headers: {
            'Content-Type': 'application/json',
@ -615,7 +640,7 @@ const App = () => {
    }

    fetchResume();
-  }, [sessionId, resume, jobDescription, setResume, setJobDescription, setSnack, loc]);
+  }, [sessionId, resume, jobDescription, setResume, setJobDescription, setSnack, connectionBase]);

  // If the RAGs have not been set, fetch them from the server
  useEffect(() => {
@ -625,7 +650,7 @@ const App = () => {
    const fetchRags = async () => {
      try {
        // Make the fetch request with proper headers
-        const response = await fetch(getConnectionBase(loc) + `/api/rags/${sessionId}`, {
+        const response = await fetch(connectionBase + `/api/rags/${sessionId}`, {
          method: 'GET',
          headers: {
            'Content-Type': 'application/json',
@ -644,7 +669,7 @@ const App = () => {
    }

    fetchRags();
-  }, [sessionId, rags, setRags, setSnack, loc]);
+  }, [sessionId, rags, setRags, setSnack, connectionBase]);

  // If context status changes, show a warning if necessary. If it drops
  // back below the threshold, clear the warning trigger
@ -664,7 +689,7 @@ const App = () => {
  const toggleRag = async (tool: Tool) => {
    tool.enabled = !tool.enabled
    try {
-      const response = await fetch(getConnectionBase(loc) + `/api/rags/${sessionId}`, {
+      const response = await fetch(connectionBase + `/api/rags/${sessionId}`, {
        method: 'PUT',
        headers: {
          'Content-Type': 'application/json',
@ -686,7 +711,7 @@ const App = () => {
  const toggleTool = async (tool: Tool) => {
    tool.enabled = !tool.enabled
    try {
-      const response = await fetch(getConnectionBase(loc) + `/api/tools/${sessionId}`, {
+      const response = await fetch(connectionBase + `/api/tools/${sessionId}`, {
        method: 'PUT',
        headers: {
          'Content-Type': 'application/json',
@ -711,7 +736,7 @@ const App = () => {
    }
    const sendSystemPrompt = async (prompt: string) => {
      try {
-        const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, {
+        const response = await fetch(connectionBase + `/api/tunables/${sessionId}`, {
          method: 'PUT',
          headers: {
            'Content-Type': 'application/json',
@ -735,7 +760,7 @@ const App = () => {

    sendSystemPrompt(systemPrompt);

-  }, [systemPrompt, setServerSystemPrompt, serverSystemPrompt, loc, sessionId, setSnack]);
+  }, [systemPrompt, setServerSystemPrompt, serverSystemPrompt, connectionBase, sessionId, setSnack]);

  useEffect(() => {
    if (sessionId === undefined) {
@ -743,7 +768,7 @@ const App = () => {
    }
    const sendMessageHistoryLength = async (length: number) => {
      try {
-        const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, {
+        const response = await fetch(connectionBase + `/api/tunables/${sessionId}`, {
          method: 'PUT',
          headers: {
            'Content-Type': 'application/json',
@ -766,11 +791,11 @@ const App = () => {

    sendMessageHistoryLength(messageHistoryLength);

-  }, [messageHistoryLength, setMessageHistoryLength, loc, sessionId, setSnack]);
+  }, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]);

  const reset = async (types: ("rags" | "tools" | "history" | "system-prompt" | "message-history-length")[], message: string = "Update successful.") => {
    try {
-      const response = await fetch(getConnectionBase(loc) + `/api/reset/${sessionId}`, {
+      const response = await fetch(connectionBase + `/api/reset/${sessionId}`, {
        method: 'PUT',
        headers: {
          'Content-Type': 'application/json',
@ -883,7 +908,7 @@ const App = () => {
      }

      // Make the fetch request with proper headers
-      const response = await fetch(getConnectionBase(loc) + `/api/chat/${sessionId}`, {
+      const response = await fetch(connectionBase + `/api/chat/${sessionId}`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
@ -1076,6 +1101,7 @@ const App = () => {
              onChange={handleTabChange} aria-label="Backstory navigation">
              <Tab label="Backstory" icon={<Avatar sx={{ width: 24, height: 24 }} variant="rounded" alt="Backstory logo" src="/logo192.png" />} iconPosition="start" />
              <Tab label="Resume Builder"/>
+                <Tab label="Visualizer" />
              <Tab label="About"/>
              </Tabs>
            </Box>}
@ -1183,10 +1209,18 @@ const App = () => {
        </CustomTabPanel>

        <CustomTabPanel tab={tab} index={1}>
-          <ResumeBuilder {...{ isScrolledToBottom, scrollToBottom, facts, setFacts, resume, setResume, jobDescription, processing, setProcessing, setSnack, connectionBase: getConnectionBase(loc), sessionId }} />
+          <ResumeBuilder {...{ isScrolledToBottom, scrollToBottom, facts, setFacts, resume, setResume, jobDescription, processing, setProcessing, setSnack, connectionBase: connectionBase, sessionId }} />
        </CustomTabPanel>

        <CustomTabPanel tab={tab} index={2}>
+          <Box className="ChatBox">
+            <Box className="Conversation">
+              {result !== undefined && <VectorVisualizer {...{ result, connectionBase, sessionId }} />}
+            </Box>
+          </Box>
+        </CustomTabPanel>
+
+        <CustomTabPanel tab={tab} index={3}>
          <Box className="ChatBox">
            <Box className="Conversation">
              <Message {...{ message: { role: 'assistant', content: about }, submitQuery }} />
--- a/frontend/src/VectorVisualizer.tsx
+++ b/frontend/src/VectorVisualizer.tsx
@ -0,0 +1,224 @@
+import React, { useEffect, useState } from 'react';
+import Box from '@mui/material/Box';
+import Plot from 'react-plotly.js';
+import TextField from '@mui/material/TextField';
+import Tooltip from '@mui/material/Tooltip';
+import Button from '@mui/material/Button';
+import SendIcon from '@mui/icons-material/Send';
+
+interface Metadata {
+  type?: string;
+  [key: string]: any;
+}
+
+interface ResultData {
+  embeddings: number[][] | number[][][];
+  documents: string[];
+  metadatas: Metadata[];
+}
+
+interface PlotData {
+  x: number[];
+  y: number[];
+  z?: number[];
+  colors: string[];
+  text: string[];
+  sizes: number[];
+  symbols: string[];
+}
+
+interface VectorVisualizerProps {
+  result: ResultData;
+  connectionBase: string;
+  sessionId?: string;
+}
+
+interface ChromaResult {
+  distances: number[];
+  documents: string[];
+  ids: string[];
+  metadatas: Metadata[];
+  query_embedding: number[];
+  query?: string;
+  vector_embedding?: number[];
+}
+
+const VectorVisualizer: React.FC<VectorVisualizerProps> = ({ result, connectionBase, sessionId }) => {
+  const [plotData, setPlotData] = useState<PlotData | null>(null);
+  const [query, setQuery] = useState<string>('');
+  const [queryEmbedding, setQueryEmbedding] = useState<ChromaResult | undefined>(undefined);
+  
+  useEffect(() => {
+    if (!result || !result.embeddings) return;
+    if (result.embeddings.length === 0) return;
+
+    const vectors: number[][] = [...result.embeddings as number[][]];
+    const documents = [...result.documents || []];
+    const metadatas = [...result.metadatas || []];
+
+    if (queryEmbedding !== undefined && queryEmbedding.vector_embedding !== undefined) {
+      metadatas.unshift({ type: 'query' });
+      documents.unshift(queryEmbedding.query || '');
+      vectors.unshift(queryEmbedding.vector_embedding);
+    }
+    const is2D = vectors.every((v: number[]) => v.length === 2);
+    const is3D = vectors.every((v: number[]) => v.length === 3);
+    if (!is2D && !is3D) {
+      console.error('Vectors are neither 2D nor 3D');
+      return;
+    }
+    console.log('Vectors:', vectors);
+    // Placeholder color assignment
+    const colorMap: Record<string, string> = {
+      'query': '#00ff00',
+    };
+    const sizeMap: Record<string, number> = {
+      'query': 10,
+    };
+    const symbolMap: Record<string, string> = {
+      'query': 'circle',
+    };
+
+    const doc_types = metadatas.map(m => m.type || 'unknown');
+    const sizes = doc_types.map(type => {
+      if (!sizeMap[type]) {
+        sizeMap[type] = 5;
+      }
+      return sizeMap[type];
+    });
+    const symbols = doc_types.map(type => {
+      if (!symbolMap[type]) {
+        symbolMap[type] = 'circle';
+      }
+      return symbolMap[type];
+    });
+    const colors = doc_types.map(type => {
+      if (!colorMap[type]) {
+        colorMap[type] = '#ff0000';
+      }
+      return colorMap[type];
+    });
+
+    const x = vectors.map((v: number[]) => v[0]);
+    const y = vectors.map((v: number[]) => v[1]);
+    const text = documents.map((doc, i) => `Type: ${doc_types[i]}<br>Text: ${doc.slice(0, 100)}...`);
+    if (is3D) {
+      const z = vectors.map((v: number[]) => v[2]);
+      setPlotData({
+        x: x,
+        y: y,
+        z: z,
+        colors: colors,
+        sizes: sizes,
+        symbols: symbols,
+        text: text
+      });
+    } else {
+      setPlotData({
+        x: x,
+        y: y,
+        colors: colors,
+        sizes: sizes,
+        symbols: symbols,
+        text: text
+      });
+    }
+  }, [result, queryEmbedding]);
+
+  const handleKeyPress = (event: any) => {
+    if (event.key === 'Enter') {
+      sendQuery(query);
+    }
+  };
+
+  const sendQuery = async (query: string) => {
+    if (!query.trim()) return;
+    setQuery('');
+
+    const response = await fetch(`${connectionBase}/api/similarity/${sessionId}`, {
+      method: 'PUT',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        query: query,
+      })
+    });
+    const chroma: ChromaResult = await response.json();
+    console.log('Chroma:', chroma);
+    setQueryEmbedding(chroma);
+  };
+
+  if (!plotData || sessionId === undefined) return (
+    <Box sx={{ display: 'flex', flexGrow: 1, justifyContent: 'center', alignItems: 'center' }}>
+      <div>Loading visualization...</div>
+    </Box>
+  );
+
+  return (
+    <>
+      <Box sx={{ display: 'flex', flexGrow: 1, justifyContent: 'center', alignItems: 'center' }}>
+      <Plot 
+        data={[
+          {
+            x: plotData.x,
+            y: plotData.y,
+            z: plotData.z,
+            mode: 'markers',
+            marker: {
+              size: plotData.sizes,
+              symbol: plotData.symbols,
+              color: plotData.colors,
+              opacity: 0.8,
+            },
+            text: plotData.text,
+            hoverinfo: 'text',
+            type: plotData.z?.length ? 'scatter3d' : 'scatter',
+          },
+        ]}
+        useResizeHandler={true}
+        config={{ responsive: true }}
+        style={{ width: '100%', height: '100%' }}
+        layout={{
+          autosize: true,
+          title: 'Vector Store Visualization',
+          xaxis: { title: 'x' },
+          yaxis: { title: 'y' },
+          zaxis: { title: 'z' },
+          margin: { r: 20, b: 10, l: 10, t: 40 },
+        }}
+      />
+      </Box>
+
+      { queryEmbedding !== undefined &&
+        <Box sx={{ display: 'flex', flexDirection: 'column', p: 1 }}>
+          <Box sx={{ fontSize: '0.8rem', mb: 1 }}>
+            Query: {queryEmbedding.query}
+          </Box>
+        </Box>
+      }
+
+      <Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
+        <TextField
+          variant="outlined"
+          fullWidth
+          type="text"
+          value={query}
+          onChange={(e) => setQuery(e.target.value)}
+          onKeyDown={handleKeyPress}
+          placeholder="Enter query to find related documents..."
+          id="QueryInput"
+        />
+        <Tooltip title="Send">
+          <Button sx={{ m: 1 }} variant="contained" onClick={() => { sendQuery(query); }}><SendIcon /></Button>
+        </Tooltip>
+      </Box>
+    </>
+  );
+};
+
+export type { VectorVisualizerProps, ResultData, Metadata };
+
+export {
+  VectorVisualizer
+};
--- a/frontend/src/declarations.d.ts
+++ b/frontend/src/declarations.d.ts
@ -0,0 +1,2 @@
+declare module 'tsne-js';
+declare module 'react-plotly.js';
--- a/src/requirements.txt
+++ b/src/requirements.txt
@ -1,44 +1,343 @@
-python-dotenv
-jupyterlab
-ipywidgets
-requests
-numpy
-pandas
-scipy
-scikit-learn
-matplotlib
-gensim
-torch
-transformers
-tqdm
-openai
-gradio
-langchain
-tiktoken
-faiss-cpu
-langchain-openai
-langchain_experimental
-langchain_chroma
-langchain[docarray]
-datasets
-sentencepiece
-matplotlib
-google-generativeai
-anthropic
-scikit-learn
-unstructured
-chromadb
-plotly
-jupyter-dash
-beautifulsoup4
-pydub
-modal
-ollama
-accelerate
-sentencepiece
-bitsandbytes
-psutil
-setuptools
-speedtest-cli
-sentence_transformers
-feedparser
+accelerate==1.6.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.16
+aiosignal==1.3.2
+annotated-types==0.7.0
+ansi2html==1.9.2
+anthropic==0.49.0
+anyio==4.9.0
+appdirs==1.4.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asgiref==3.8.1
+asttokens==3.0.0
+async-lru==2.0.5
+attrs==25.3.0
+babel==2.17.0
+backoff==2.2.1
+bcrypt==4.3.0
+beautifulsoup4==4.13.4
+bigdl-core-xe-all==2.7.0b20250416
+bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl#sha256=66deda2b99cee0d4e52a183d9bac5c8e8618cd9b4d4933ccf23b908622d6b879
+bleach==6.2.0
+blinker==1.9.0
+bs4==0.0.2
+build==1.2.2.post1
+cachetools==5.5.2
+certifi==2025.1.31
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.1
+chroma-hnswlib==0.7.6
+chromadb==0.6.3
+click==8.1.8
+coloredlogs==15.0.1
+comm==0.2.2
+contourpy==1.3.2
+cryptography==44.0.2
+cycler==0.12.1
+dash==3.0.3
+dataclasses-json==0.6.7
+datasets==3.5.0
+debugpy==1.8.14
+decorator==5.2.1
+defusedxml==0.7.1
+Deprecated==1.2.18
+diffusers==0.33.1
+dill==0.3.8
+distro==1.9.0
+dpcpp-cpp-rt==2025.0.4
+durationpy==0.9
+einops==0.8.1
+emoji==2.14.1
+eval_type_backport==0.2.2
+executing==2.2.0
+faiss-cpu==1.10.0
+fastapi==0.115.9
+fastjsonschema==2.21.1
+feedparser==6.0.11
+ffmpy==0.5.0
+filelock==3.13.1
+filetype==1.2.0
+Flask==3.0.3
+flask-cors==5.0.1
+flask-sock==0.7.0
+flatbuffers==25.2.10
+fonttools==4.57.0
+fqdn==1.5.1
+frozendict==2.4.6
+frozenlist==1.5.0
+fsspec==2024.6.1
+gensim==4.3.3
+geographiclib==2.0
+geopy==2.4.1
+google-ai-generativelanguage==0.6.15
+google-api-core==2.24.2
+google-api-python-client==2.167.0
+google-auth==2.39.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.4
+googleapis-common-protos==1.70.0
+gradio==5.25.2
+gradio_client==1.8.0
+greenlet==3.2.0
+groovy==0.1.2
+grpcio==1.71.0
+grpcio-status==1.71.0
+grpclib==0.4.7
+h11==0.14.0
+h2==4.2.0
+hpack==4.1.0
+html5lib==1.1
+httpcore==1.0.8
+httplib2==0.22.0
+httptools==0.6.4
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.30.2
+humanfriendly==10.0
+hyperframe==6.1.0
+idna==3.10
+impi-devel==2021.14.1
+impi-rt==2021.14.1
+importlib_metadata==8.6.1
+importlib_resources==6.5.2
+intel-cmplr-lib-rt==2025.0.2
+intel-cmplr-lib-ur==2025.0.2
+intel-cmplr-lic-rt==2025.0.2
+intel-opencl-rt==2025.0.4
+intel-openmp==2025.0.4
+intel-pti==0.10.0
+intel-sycl-rt==2025.0.2
+intel_extension_for_pytorch==2.6.10+xpu
+ipex-llm @ file:///opt/wheels/ipex_llm-2.2.0.dev0-py3-none-any.whl#sha256=5023ff4dc9799838486b4d160d5f3dcd5f6d3bb9ac8a2c6cabaf90034b540ba3
+ipykernel==6.29.5
+ipython==9.1.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.6
+isoduration==20.11.0
+itsdangerous==2.2.0
+jedi==0.19.2
+Jinja2==3.1.4
+jiter==0.9.0
+joblib==1.4.2
+json5==0.12.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter-dash==0.4.2
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.15.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.0
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.14
+kiwisolver==1.4.8
+kubernetes==32.0.1
+langchain==0.3.23
+langchain-chroma==0.2.3
+langchain-community==0.3.21
+langchain-core==0.3.52
+langchain-experimental==0.3.4
+langchain-ollama==0.3.2
+langchain-openai==0.3.13
+langchain-text-splitters==0.3.8
+langdetect==1.0.9
+langsmith==0.3.31
+llvmlite==0.44.0
+lxml==5.3.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.26.1
+matplotlib==3.10.1
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.3
+mkl==2025.0.1
+mkl-dpcpp==2025.0.1
+mmh3==5.1.0
+modal==0.74.4
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.4.3
+multiprocess==0.70.16
+multitasking==0.0.11
+mypy-extensions==1.0.0
+narwhals==1.35.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.3
+nltk==3.9.1
+notebook_shim==0.2.4
+numba==0.61.2
+numpy==1.26.4
+oauthlib==3.2.2
+olefile==0.47
+ollama==0.4.8
+oneccl==2021.14.1
+oneccl-bind-pt==2.6.0+xpu
+oneccl-devel==2021.14.1
+onemkl-sycl-blas==2025.0.1
+onemkl-sycl-datafitting==2025.0.1
+onemkl-sycl-dft==2025.0.1
+onemkl-sycl-lapack==2025.0.1
+onemkl-sycl-rng==2025.0.1
+onemkl-sycl-sparse==2025.0.1
+onemkl-sycl-stats==2025.0.1
+onemkl-sycl-vm==2025.0.1
+onnxruntime==1.21.0
+openai==1.75.0
+opentelemetry-api==1.32.1
+opentelemetry-exporter-otlp-proto-common==1.32.1
+opentelemetry-exporter-otlp-proto-grpc==1.32.1
+opentelemetry-instrumentation==0.53b1
+opentelemetry-instrumentation-asgi==0.53b1
+opentelemetry-instrumentation-fastapi==0.53b1
+opentelemetry-proto==1.32.1
+opentelemetry-sdk==1.32.1
+opentelemetry-semantic-conventions==0.53b1
+opentelemetry-util-http==0.53b1
+orjson==3.10.16
+overrides==7.7.0
+packaging==24.1
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+peewee==3.17.9
+peft==0.15.2
+pexpect==4.9.0
+pillow==11.0.0
+platformdirs==4.3.7
+plotly==6.0.1
+posthog==3.25.0
+prometheus_client==0.21.1
+prompt_toolkit==3.0.51
+propcache==0.3.1
+proto-plus==1.26.1
+protobuf==5.29.4
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.22
+pydantic==2.11.3
+pydantic-settings==2.8.1
+pydantic_core==2.33.1
+pydub==0.25.1
+Pygments==2.19.1
+PyHyphen==4.0.4
+pynndescent==0.5.13
+pyparsing==3.2.3
+pypdf==5.4.0
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-iso639==2025.2.18
+python-json-logger==3.3.0
+python-magic==0.4.27
+python-multipart==0.0.20
+python-oxmsg==0.0.2
+pytorch-triton-xpu==3.2.0
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==26.4.0
+pyzt==0.0.2
+RapidFuzz==3.13.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+retrying==1.3.4
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==14.0.0
+rpds-py==0.24.0
+rsa==4.9.1
+ruamel.yaml==0.18.10
+ruamel.yaml.clib==0.2.12
+ruff==0.11.5
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.13.1
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentence-transformers==3.4.0
+sentencepiece==0.2.0
+sgmllib3k==1.0.0
+shellingham==1.5.4
+sigtools==4.0.1
+simple-websocket==1.1.0
+six==1.17.0
+smart-open==7.1.0
+sniffio==1.3.1
+soupsieve==2.6
+speedtest-cli==2.1.3
+SQLAlchemy==2.0.40
+stack-data==0.6.3
+starlette==0.45.3
+sympy==1.13.1
+synchronicity==0.9.11
+tbb==2022.1.0
+tcmlib==1.2.0
+tenacity==9.1.2
+terminado==0.18.1
+threadpoolctl==3.6.0
+tiktoken==0.9.0
+tinycss2==1.4.0
+tokenizers==0.21.1
+toml==0.10.2
+tomlkit==0.13.2
+torch==2.6.0+xpu
+torchaudio==2.6.0+xpu
+torchvision==0.21.0+xpu
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+typer==0.15.2
+types-certifi==2021.10.8.3
+types-python-dateutil==2.9.0.20241206
+types-toml==0.10.8.20240310
+typing-inspect==0.9.0
+typing-inspection==0.4.0
+typing_extensions==4.12.2
+tzdata==2025.2
+umap-learn==0.5.7
+umf==0.9.1
+unstructured==0.17.2
+unstructured-client==0.32.3
+uri-template==1.3.0
+uritemplate==4.1.1
+urllib3==2.4.0
+uvicorn==0.34.1
+uvloop==0.21.0
+watchdog==6.0.0
+watchfiles==1.0.5
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+Werkzeug==3.0.6
+widgetsnbextension==4.0.14
+wrapt==1.17.2
+wsproto==1.2.0
+xxhash==3.5.0
+yarl==1.19.0
+yfinance==0.2.55
+zipp==3.21.0
+zstandard==0.23.0
--- a/src/server.py
+++ b/src/server.py
@ -24,13 +24,21 @@ try_import('ollama')
 try_import('requests')
 try_import('bs4', 'beautifulsoup4')
 try_import('fastapi')
+try_import('uvicorn')
+try_import('sklearn')
+try_import('numpy')
+try_import('umap')

 import ollama
 import requests
 from bs4 import BeautifulSoup
-from fastapi import FastAPI, Request
+from fastapi import FastAPI, Request, BackgroundTasks
 from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
 from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+import numpy as np
+#from sklearn.manifold import TSNE
+import umap

 from utils import (
    rag as Rag,
@ -354,14 +362,15 @@ async def handle_tool_calls(message):

 # %%
 class WebServer:
-    def __init__(self, logging, client, collection, model=MODEL_NAME):
+    def __init__(self, logging, client, model=MODEL_NAME):
        self.logging = logging
        self.app = FastAPI()
        self.contexts = {}
        self.client = client
        self.model = model
        self.processing = False
-        self.collection = collection
+        self.file_watcher = None
+        self.observer = None

        self.app.add_middleware(
            CORSMiddleware,
@ -371,6 +380,26 @@ class WebServer:
            allow_headers=["*"],
        )

+        @self.app.on_event("startup")
+        async def startup_event():
+            
+            # Start the file watcher
+            self.observer, self.file_watcher = Rag.start_file_watcher(
+                llm=client, 
+                watch_directory=defines.doc_dir,
+                initialize=True,  # Only loads documents if no hash state exists
+                recreate=False    # Don't recreate if exists
+            )
+            
+            print(f"API started with {self.file_watcher.collection.count()} documents in the collection")
+
+        @self.app.on_event("shutdown")
+        async def shutdown_event():
+            if self.observer:
+                self.observer.stop()
+                self.observer.join()
+            print("File watcher stopped")
+
        self.setup_routes()

    def setup_routes(self):
@ -381,6 +410,136 @@ class WebServer:
            return RedirectResponse(url=f"/{context['id']}", status_code=307)
            #return JSONResponse({"redirect": f"/{context['id']}"})

+        @self.app.get("/api/query")
+        async def query_documents(query: str, top_k: int = 3):
+            if not self.file_watcher:
+                return
+
+            """Query the RAG system with the given prompt."""
+            results = self.file_watcher.find_similar(query, top_k=top_k)
+            return {
+                "query": query,
+                "results": [
+                    {
+                        "content": doc,
+                        "metadata": meta,
+                        "distance": dist
+                    }
+                    for doc, meta, dist in zip(
+                        results["documents"], 
+                        results["metadatas"],
+                        results["distances"]
+                    )
+                ]
+            }
+
+        @self.app.post("/api/refresh/{file_path:path}")
+        async def refresh_document(file_path: str, background_tasks: BackgroundTasks):
+            if not self.file_watcher:
+                return
+
+            """Manually refresh a specific document in the collection."""
+            full_path = os.path.join(defines.doc_dir, file_path)
+            
+            if not os.path.exists(full_path):
+                return {"status": "error", "message": "File not found"}
+            
+            # Schedule the update in the background
+            background_tasks.add_task(
+                self.file_watcher.process_file_update, full_path
+            )
+            
+            return {
+                "status": "success", 
+                "message": f"Document refresh scheduled for {file_path}"
+            }
+
+        # @self.app.post("/api/refresh-all")
+        # async def refresh_all_documents():
+        #     if not self.file_watcher:
+        #         return
+
+        #     """Refresh all documents in the collection."""
+        #     # Re-initialize file hashes and process all files
+        #     self.file_watcher._initialize_file_hashes()
+            
+        #     # Schedule updates for all files
+        #     file_paths = self.file_watcher.file_hashes.keys()
+        #     tasks = [self.file_watcher.process_file_update(path) for path in file_paths]
+            
+        #     # Wait for all updates to complete
+        #     await asyncio.gather(*tasks)
+            
+        #     return {
+        #         "status": "success",
+        #         "message": f"Refreshed {len(file_paths)} documents",
+        #         "document_count": file_watcher.collection.count()
+        #     }
+
+        @self.app.put('/api/tsne/{context_id}')
+        async def put_tsne(context_id: str, request: Request):
+            if not self.file_watcher:
+                return
+        
+            if not is_valid_uuid(context_id):
+                logging.warning(f"Invalid context_id: {context_id}")
+                return JSONResponse({"error": "Invalid context_id"}, status_code=400)
+
+            context = self.upsert_context(context_id)
+
+            try:
+                data = await request.json()
+                dimensions = data.get('dimensions', 2)
+            except:
+                dimensions = 2
+
+            try:
+                result = self.file_watcher.collection.get(include=['embeddings', 'documents', 'metadatas'])
+                vectors = np.array(result['embeddings'])
+                umap_model = umap.UMAP(n_components=dimensions, random_state=42)
+                embedding = umap_model.fit_transform(vectors)
+                context['umap_model'] = umap_model
+                result['embeddings'] = embedding.tolist()
+                return JSONResponse(result)
+
+            except Exception as e:
+                logging.error(e)
+                return JSONResponse({"error": str(e)}, 500)
+
+        @self.app.put('/api/similarity/{context_id}')
+        async def put_similarity(context_id: str, request: Request):
+            if not self.file_watcher:
+                return
+        
+            if not is_valid_uuid(context_id):
+                logging.warning(f"Invalid context_id: {context_id}")
+                return JSONResponse({"error": "Invalid context_id"}, status_code=400)
+            
+            context = self.upsert_context(context_id)
+            if not context.get("umap_model"):
+                return JSONResponse({"error": "No umap_model found in context"}, status_code=404)
+            
+            try:
+                data = await request.json()
+                query = data.get('query', '')
+            except:
+                query = ''
+            if not query:
+                return JSONResponse({"error": "No query provided"}, status_code=400)
+
+            try:
+                chroma_results = self.file_watcher.find_similar(query=query, top_k=10)
+                if not chroma_results:
+                    return JSONResponse({"error": "No results found"}, status_code=404)
+                chroma_embedding = chroma_results["query_embedding"]
+                normalized = (chroma_embedding - chroma_embedding.min()) / (chroma_embedding.max() - chroma_embedding.min())
+                vector_embedding = context["umap_model"].transform([normalized])[0].tolist()
+                return JSONResponse({ **chroma_results, "query": query, "vector_embedding": vector_embedding })
+
+            except Exception as e:
+                logging.error(e)
+                #return JSONResponse({"error": str(e)}, 500)
+
        @self.app.put('/api/reset/{context_id}')
        async def put_reset(context_id: str, request: Request):
            if not is_valid_uuid(context_id):
@ -634,8 +793,6 @@ class WebServer:
            self.logging.info(f"Serve index.html for {path}")
            return FileResponse(os.path.join(defines.static_content, 'index.html'))

-    import requests
-
    def save_context(self, session_id):
        """
        Serialize a Python dictionary to a file in the sessions directory.
@ -656,10 +813,14 @@ class WebServer:
        # Create the full file path
        file_path = os.path.join(defines.session_dir, session_id)
        
+        umap_model = context.get("umap_model")
+        if umap_model:
+            del context["umap_model"]
        # Serialize the data to JSON and write to file
        with open(file_path, 'w') as f:
            json.dump(context, f)
-            
+        if umap_model:
+            context["umap_model"] = umap_model
        return session_id

    def load_context(self, session_id):
@ -719,6 +880,9 @@ class WebServer:
        return self.load_context(context_id)

    async def chat(self, context, content):
+        if not self.file_watcher:
+            return
+        
        content = content.strip()
        if not content:
            yield {"status": "error", "message": "Invalid request"}
@ -744,7 +908,7 @@ class WebServer:
        for rag in context["rags"]:
            if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
                yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
-                chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10)
+                chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
                if chroma_results:
                    rag_docs.extend(chroma_results["documents"])
                    metadata["rag"] = { "name": rag["name"], **chroma_results }
@ -852,6 +1016,9 @@ class WebServer:
            self.processing = False

    async def generate_resume(self, context, content):
+        if not self.file_watcher:
+            return
+
        content = content.strip()
        if not content:
            yield {"status": "error", "message": "Invalid request"}
@ -880,21 +1047,21 @@ class WebServer:
            "prompt_eval_duration": 0,
        }
        rag_docs = []
+        resume_doc = open(defines.resume_doc, 'r').read()
+        rag_docs.append(resume_doc)
        for rag in context["rags"]:
            if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
                yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
-                chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10)
+                chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
                if chroma_results:
                    rag_docs.extend(chroma_results["documents"])
                    metadata["rag"] = { "name": rag["name"], **chroma_results }
        preamble = f"The current time is {DateTime()}\n"
-        if len(rag_docs):
-            preamble = f"""[WORK HISTORY]:\n"""
-            for doc in rag_docs:
-                preamble += doc
-                resume["rag"] += f"{doc}\n"
-            preamble += f"\n[/WORK HISTORY]\n"
-
+        preamble = f"""[WORK HISTORY]:\n"""
+        for doc in rag_docs:
+            preamble += f"{doc}\n"
+            resume["rag"] += f"{doc}\n"
+        preamble += f"\n[/WORK HISTORY]\n"
        
        content = f"{preamble}\nUse the above WORK HISTORY to create the resume for this JOB DESCRIPTION. Do not use the JOB DESCRIPTION skills as skills the user posseses unless listed in WORK HISTORY:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n"

@ -987,8 +1154,13 @@ class WebServer:


    def run(self, host='0.0.0.0', port=WEB_PORT, **kwargs):
-        import uvicorn
-        uvicorn.run(self.app, host=host, port=port)
+        try:
+            uvicorn.run(self.app, host=host, port=port)
+        except KeyboardInterrupt:
+            if self.observer:
+                self.observer.stop()
+        if self.observer:
+            self.observer.join()

 # %%

@ -1004,17 +1176,16 @@ def main():
    client = ollama.Client(host=args.ollama_server)
    model = args.ollama_model

-    documents = Rag.load_text_files(defines.doc_dir)
-    print(f"Documents loaded {len(documents)}")
-    collection = Rag.get_vector_collection()
-    chunks = Rag.create_chunks_from_documents(documents)
-    Rag.add_embeddings_to_collection(client, collection, chunks)
-    doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
-    print(f"Document types: {doc_types}")
-    print(f"Vectorstore created with {collection.count()} documents")
+#    documents = Rag.load_text_files(defines.doc_dir)
+#    print(f"Documents loaded {len(documents)}")
+#    chunks = Rag.create_chunks_from_documents(documents)
+#    doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
+#    print(f"Document types: {doc_types}")
+#    print(f"Vectorstore created with {collection.count()} documents")

-    web_server = WebServer(logging, client, collection, model)
+    web_server = WebServer(logging, client, model)
    logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
+    
    web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)

 main()
--- a/src/utils/defines.py
+++ b/src/utils/defines.py
@ -1,11 +1,14 @@
+import os
+
 ollama_api_url="http://ollama:11434"  # Default Ollama local endpoint
 #model = "deepseek-r1:7b" # Tool calls don't work
 #model="mistral:7b"       # Tool calls don't work
 #model = "llama3.2"
-model="qwen2.5:7b"
+model = os.getenv('MODEL_NAME', 'qwen2.5:7b')
 encoding_model="mxbai-embed-large"
-persist_directory="/root/.cache/chromadb"
+persist_directory = os.getenv('PERSIST_DIR', "/opt/backstory/chromadb")
 max_context = 2048*8*2
 doc_dir = "/opt/backstory/docs/"
 session_dir = "/opt/backstory/sessions"
-static_content = '/opt/backstory/frontend/deployed'
+static_content = '/opt/backstory/frontend/deployed'
+resume_doc = '/opt/backstory/docs/resume/generic.txt'
--- a/src/utils/rag.py
+++ b/src/utils/rag.py
@ -1,118 +1,481 @@
-__all__ = [
-    'load_text_files',
-    'create_chunks_from_documents',
-    'get_vector_collection',
-    'add_embeddings_to_collection',
-    'find_similar'
-]
-
 import os
 import glob
+import time
+import hashlib
+import asyncio
+import logging
+import os
+import glob
+import time
+import hashlib
+import asyncio
+import json
+import pickle
+import numpy as np

 import chromadb
 import ollama
 from langchain.text_splitter import CharacterTextSplitter
-from langchain.schema import Document  # Import the Document class
+from langchain.schema import Document
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler

+# Import your existing modules
 if __name__ == "__main__":
-  # When running directly, use absolute imports
-  import defines
+    # When running directly, use absolute imports
+    import defines
 else:
-  # When imported as a module, use relative imports
-  from . import defines
+    # When imported as a module, use relative imports
+    from . import defines

-def load_text_files(directory, encoding="utf-8"):
-    file_paths = glob.glob(os.path.join(directory, "**/*"), recursive=True)
-    documents = []
+__all__ = [
+    'ChromaDBFileWatcher',
+    'start_file_watcher'
+]

-    for file_path in file_paths:
-        if os.path.isfile(file_path):  # Ensure it's a file, not a directory
+class ChromaDBFileWatcher(FileSystemEventHandler):
+    def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents", 
+                 chunk_size=1000, chunk_overlap=200, recreate=False):
+        self.llm = llm
+        self.watch_directory = watch_directory
+        self.persist_directory = persist_directory or defines.persist_directory
+        self.collection_name = collection_name
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.loop = loop
+
+        # Initialize ChromaDB collection
+        self.collection = self._get_vector_collection(recreate=recreate)
+        
+        # Setup text splitter
+        self.text_splitter = CharacterTextSplitter(
+            chunk_size=chunk_size, 
+            chunk_overlap=chunk_overlap
+        )
+        
+        # Track file hashes and processing state
+        self.file_hashes: dict[str, str] = {}
+        self.update_lock = asyncio.Lock()
+        self.processing_files = set()
+        
+        # Initialize file hashes
+        self.llm = llm
+        self.watch_directory = watch_directory
+        self.persist_directory = persist_directory or defines.persist_directory
+        self.collection_name = collection_name
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        
+        # Path for storing file hash state
+        self.hash_state_path = os.path.join(self.persist_directory, f"{collection_name}_hash_state.json")
+        
+        # Initialize ChromaDB collection
+        self.collection = self._get_vector_collection(recreate=recreate)
+        
+        # Setup text splitter
+        self.text_splitter = CharacterTextSplitter(
+            chunk_size=chunk_size, 
+            chunk_overlap=chunk_overlap
+        )
+        
+        # Track file hashes and processing state
+        self.file_hashes = self._load_hash_state()
+        self.update_lock = asyncio.Lock()
+        self.processing_files = set()
+        
+        # Only scan for new/changed files if we have previous hash state
+        if not self.file_hashes:
+            self._initialize_file_hashes()
+        else:
+            self._update_file_hashes()
+    
+    def collection(self):
+        return self.collection
+    
+    def _save_hash_state(self):
+        """Save the current file hash state to disk."""
+        try:
+            # Create directory if it doesn't exist
+            os.makedirs(os.path.dirname(self.hash_state_path), exist_ok=True)
+            
+            with open(self.hash_state_path, 'w') as f:
+                json.dump(self.file_hashes, f)
+                
+            logging.info(f"Saved hash state with {len(self.file_hashes)} entries")
+        except Exception as e:
+            logging.error(f"Error saving hash state: {e}")
+    
+    def _load_hash_state(self):
+        """Load the file hash state from disk."""
+        if os.path.exists(self.hash_state_path):
            try:
-                with open(file_path, "r", encoding=encoding) as f:
-                    content = f.read()
-                    
-                    # Extract top-level directory
-                    rel_path = os.path.relpath(file_path, directory)  # Relative to base directory
-                    top_level_dir = rel_path.split(os.sep)[0]  # Get the first directory in the path
-
-                    documents.append(Document(
-                        page_content=content,  # Required format for LangChain
-                        metadata={"doc_type": top_level_dir, "path": file_path}
-                    ))
+                with open(self.hash_state_path, 'r') as f:
+                    hash_state = json.load(f)
+                logging.info(f"Loaded hash state with {len(hash_state)} entries")
+                return hash_state
            except Exception as e:
-                print(f"Failed to load {file_path}: {e}")
+                logging.error(f"Error loading hash state: {e}")
+        
+        return {}
+    
+    def _update_file_hashes(self):
+        """Update file hashes by checking for new or modified files."""
+        # Check for new or modified files
+        file_paths = glob.glob(os.path.join(self.watch_directory, "**/*"), recursive=True)
+        files_checked = 0
+        files_changed = 0
+        
+        for file_path in file_paths:
+            if os.path.isfile(file_path):
+                files_checked += 1
+                current_hash = self._get_file_hash(file_path)
+                if not current_hash:
+                    continue

-    return documents
+                # If file is new or changed
+                if file_path not in self.file_hashes or self.file_hashes[file_path] != current_hash:
+                    self.file_hashes[file_path] = current_hash
+                    files_changed += 1
+                    # Schedule an update for this file
+                    asyncio.run_coroutine_threadsafe(self.process_file_update(file_path), self.loop)
+                    logging.info(f"File changed: {file_path}")
+        
+        # Check for deleted files
+        deleted_files = []
+        for file_path in self.file_hashes:
+            if not os.path.exists(file_path):
+                deleted_files.append(file_path)
+                # Schedule removal
+                asyncio.run_coroutine_threadsafe(self.remove_file_from_collection(file_path), self.loop)
+                logging.info(f"File deleted: {file_path}")
+        
+        # Remove deleted files from hash state
+        for file_path in deleted_files:
+            del self.file_hashes[file_path]
+            
+        logging.info(f"Checked {files_checked} files: {files_changed} new/changed, {len(deleted_files)} deleted")
+        
+        # Save the updated state
+        self._save_hash_state()
+    
+    # ... rest of existing methods ...
+    
+    async def process_file_update(self, file_path):
+        """Process a file update event."""
+        # Skip if already being processed
+        if file_path in self.processing_files:
+            return
+            
+        try:
+            self.processing_files.add(file_path)
+            
+            # Wait a moment to ensure the file write is complete
+            await asyncio.sleep(0.5)
+            
+            # Check if content changed via hash
+            current_hash = self._get_file_hash(file_path)
+            if not current_hash:  # File might have been deleted or is inaccessible
+                return
+                
+            if file_path in self.file_hashes and self.file_hashes[file_path] == current_hash:
+                # File hasn't actually changed in content
+                return
+            
+            # Update file hash
+            self.file_hashes[file_path] = current_hash
+            
+            # Process and update the file in ChromaDB
+            async with self.update_lock:
+                await self._update_document_in_collection(file_path)
+                
+            # Save the hash state after successful update
+            self._save_hash_state()
+                
+        except Exception as e:
+            logging.error(f"Error processing update for {file_path}: {e}")
+        finally:
+            self.processing_files.discard(file_path)
+    
+    async def remove_file_from_collection(self, file_path):
+        """Remove all chunks related to a deleted file."""
+        async with self.update_lock:
+            try:
+                # Find all documents with the specified path
+                results = self.collection.get(
+                    where={"path": file_path}
+                )
+                
+                if results and 'ids' in results and results['ids']:
+                    self.collection.delete(ids=results['ids'])
+                    logging.info(f"Removed {len(results['ids'])} chunks for deleted file: {file_path}")
+                
+                # Remove from hash dictionary
+                if file_path in self.file_hashes:
+                    del self.file_hashes[file_path]
+                    # Save the updated hash state
+                    self._save_hash_state()
+                    
+            except Exception as e:
+                logging.error(f"Error removing file from collection: {e}")

-def get_vector_collection(path=defines.persist_directory, name="documents"):
-  # Initialize ChromaDB client
-  chroma_client = chromadb.PersistentClient(path=path, settings=chromadb.Settings(anonymized_telemetry=False))
+    def _get_vector_collection(self, recreate=False):
+        """Get or create a ChromaDB collection."""
+        # Initialize ChromaDB client
+        chroma_client = chromadb.PersistentClient(
+            path=self.persist_directory, 
+            settings=chromadb.Settings(anonymized_telemetry=False)
+        )

-  # Check if the collection exists and delete it
-  if os.path.exists(path):
-      try:
-          chroma_client.delete_collection(name=name)
-      except Exception as e:
-          print(f"Failed to delete existing collection: {e}")
+        # Check if the collection exists and delete it if recreate is True
+        if recreate and os.path.exists(self.persist_directory):
+            try:
+                chroma_client.delete_collection(name=self.collection_name)
+            except Exception as e:
+                logging.error(f"Failed to delete existing collection: {e}")

-  return chroma_client.get_or_create_collection(name=name)
+        return chroma_client.get_or_create_collection(
+            name=self.collection_name,
+            metadata={
+                "hnsw:space": "cosine"
+            })
+    
+    def load_text_files(self, directory=None, encoding="utf-8"):
+        """Load all text files from a directory into Document objects."""
+        directory = directory or self.watch_directory
+        file_paths = glob.glob(os.path.join(directory, "**/*"), recursive=True)
+        documents = []

-# Function to generate embeddings using Ollama
-def get_embedding(llm, text):
-    response = llm.embeddings(model=defines.model, prompt=text, options={ 'num_ctx': defines.max_context })
-    return response["embedding"]
+        for file_path in file_paths:
+            if os.path.isfile(file_path):  # Ensure it's a file, not a directory
+                try:
+                    with open(file_path, "r", encoding=encoding) as f:
+                        content = f.read()
+                        
+                        # Extract top-level directory
+                        rel_path = os.path.relpath(file_path, directory)
+                        top_level_dir = rel_path.split(os.sep)[0]

-def add_embeddings_to_collection(llm, collection, chunks):
-  # Store documents in ChromaDB
-  for i, text_or_doc in enumerate(chunks):
-      # If input is a Document, extract the text content
-      if isinstance(text_or_doc, Document):
-          text = text_or_doc.page_content
-          metadata = text_or_doc.metadata
-      else:
-          text = text_or_doc  # Assume it's already a string
-          metadata = { "index": i }
+                        documents.append(Document(
+                            page_content=content,
+                            metadata={"doc_type": top_level_dir, "path": file_path}
+                        ))
+                except Exception as e:
+                    logging.error(f"Failed to load {file_path}: {e}")

-      embedding = get_embedding(llm, text)
-      collection.add(
-          ids=[str(i)], 
-          documents=[text], 
-          embeddings=[embedding],
-          metadatas=[metadata]
-      )
+        return documents
+    
+    def create_chunks_from_documents(self, docs):
+        """Split documents into chunks using the text splitter."""
+        return self.text_splitter.split_documents(docs)
+    
+    def get_embedding(self, text):
+        """Generate embeddings using Ollama."""
+        response = self.llm.embeddings(
+            model=defines.model, 
+            prompt=text, 
+            options={"num_ctx": defines.max_context}
+        )
+        return self._normalize_embeddings(response["embedding"])
+    
+    def add_embeddings_to_collection(self, chunks):
+        """Add embeddings for chunks to the collection."""
+        for i, chunk in enumerate(chunks):
+            text = chunk.page_content
+            metadata = chunk.metadata

-def find_similar(llm, collection, query, top_k=3):
-    query_embedding = get_embedding(llm, query)
-    results = collection.query(
-        query_embeddings=[query_embedding], 
-        n_results=top_k,
-        include=["documents", "metadatas", "distances"]
+            # Generate a more unique ID based on content and metadata
+            content_hash = hashlib.md5(text.encode()).hexdigest()
+            path_hash = ""
+            if "path" in metadata:
+                path_hash = hashlib.md5(metadata["path"].encode()).hexdigest()[:8]
+            
+            chunk_id = f"{path_hash}_{content_hash}_{i}"
+            
+            embedding = self.get_embedding(text)
+            self.collection.add(
+                ids=[chunk_id], 
+                documents=[text], 
+                embeddings=[embedding],
+                metadatas=[metadata]
+            )
+    
+    def find_similar(self, query, top_k=3):
+        """Find similar documents to the query."""
+        query_embedding = self.get_embedding(query)
+        results = self.collection.query(
+            query_embeddings=[query_embedding], 
+            n_results=top_k,
+            include=["documents", "metadatas", "distances"]
+        )
+        return {
+            "query_embedding": query_embedding,
+            "ids": results["ids"][0],
+            "documents": results["documents"][0],
+            "distances": results["distances"][0],
+            "metadatas": results["metadatas"][0],
+        }
+    
+    def _initialize_file_hashes(self):
+        """Initialize the hash dictionary for all files in the directory."""
+        file_paths = glob.glob(os.path.join(self.watch_directory, "**/*"), recursive=True)
+        for file_path in file_paths:
+            if os.path.isfile(file_path):
+                hash = self._get_file_hash(file_path)
+                if hash:
+                    self.file_hashes[file_path] = hash
+    
+    def _get_file_hash(self, file_path):
+        """Calculate MD5 hash of a file."""
+        try:
+            with open(file_path, 'rb') as f:
+                return hashlib.md5(f.read()).hexdigest()
+        except Exception as e:
+            logging.error(f"Error hashing file {file_path}: {e}")
+            return None
+    
+    def on_modified(self, event):
+        """Handle file modification events."""
+        if event.is_directory:
+            return
+        
+        file_path = event.src_path
+        # Schedule the update using asyncio
+        asyncio.run_coroutine_threadsafe(self.process_file_update(file_path), self.loop)
+        logging.info(f"File modified: {file_path}")
+    
+    def on_created(self, event):
+        """Handle file creation events."""
+        if event.is_directory:
+            return
+        
+        file_path = event.src_path
+        # Schedule the update using asyncio
+        asyncio.run_coroutine_threadsafe(self.process_file_update(file_path), self.loop)
+        logging.info(f"File created: {file_path}")
+    
+    def on_deleted(self, event):
+        """Handle file deletion events."""
+        if event.is_directory:
+            return
+        
+        file_path = event.src_path
+        asyncio.run_coroutine_threadsafe(self.remove_file_from_collection(file_path), self.loop)
+        logging.info(f"File deleted: {file_path}")
+
+
+    def _normalize_embeddings(self, embeddings):
+        norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
+        return embeddings / norms
+
+    async def _update_document_in_collection(self, file_path):
+        """Update a document in the ChromaDB collection."""
+        try:
+            # Remove existing entries for this file
+            existing_results = self.collection.get(where={"path": file_path})
+            if existing_results and 'ids' in existing_results and existing_results['ids']:
+                self.collection.delete(ids=existing_results['ids'])
+            
+            # Create document object in LangChain format
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            
+            # Extract top-level directory
+            rel_path = os.path.relpath(file_path, self.watch_directory)
+            top_level_dir = rel_path.split(os.sep)[0]
+            
+            document = Document(
+                page_content=content,
+                metadata={"doc_type": top_level_dir, "path": file_path}
+            )
+            
+            # Create chunks
+            chunks = self.text_splitter.split_documents([document])
+            
+            # Add chunks to collection
+            self.add_embeddings_to_collection(chunks)
+            
+            logging.info(f"Updated {len(chunks)} chunks for file: {file_path}")
+            
+        except Exception as e:
+            logging.error(f"Error updating document in collection: {e}")
+    
+    def initialize_collection(self):
+        """Initialize the collection with all documents from the watch directory."""
+        documents = self.load_text_files()
+        logging.info(f"Documents loaded: {len(documents)}")
+        
+        chunks = self.create_chunks_from_documents(documents)
+        self.add_embeddings_to_collection(chunks)
+        
+        logging.info(f"Vectorstore created with {self.collection.count()} documents")
+        
+        # Display document types
+        doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
+        logging.info(f"Document types: {doc_types}")
+        
+        return len(chunks)
+
+# Function to start the file watcher
+def start_file_watcher(llm, watch_directory, persist_directory=None, 
+                      collection_name="documents", initialize=False, recreate=False):
+    """
+    Start watching a directory for file changes.
+    
+    Args:
+        llm: The language model client
+        watch_directory: Directory to watch for changes
+        persist_directory: Directory to persist ChromaDB and hash state
+        collection_name: Name of the ChromaDB collection
+        initialize: Whether to initialize the collection with all documents (only needed first time)
+        recreate: Whether to recreate the collection (will delete existing)
+    """
+    loop = asyncio.get_event_loop()
+
+    file_watcher = ChromaDBFileWatcher(
+        llm, 
+        watch_directory, 
+        loop=loop,
+        persist_directory=persist_directory,
+        collection_name=collection_name,
+        recreate=recreate
    )
-    return {
-       "query_embedding": query_embedding,
-       "ids": results["ids"][0],
-       "documents": results["documents"][0],
-       "distances": results["distances"][0],
-       "metadatas": results["metadatas"][0],
-    }
-
-def create_chunks_from_documents(docs):
-  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-  return text_splitter.split_documents(docs)
+    
+    # Initialize collection if requested and no existing hash state
+    if initialize and not file_watcher.file_hashes:
+        file_watcher.initialize_collection()
+    
+    # Start observer
+    observer = Observer()
+    observer.schedule(file_watcher, watch_directory, recursive=True)
+    observer.start()
+    
+    logging.info(f"Started watching directory: {watch_directory}")
+    return observer, file_watcher

 if __name__ == "__main__":
-  # When running directly, use absolute imports
-  import defines
-  llm = ollama.Client(host=defines.ollama_api_url)
-  documents = load_text_files(defines.doc_dir)
-  print(f"Documents loaded {len(documents)}")
-  collection = get_vector_collection()
-  chunks = create_chunks_from_documents(documents)
-  add_embeddings_to_collection(llm, collection, chunks)
-  doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
-  print(f"Document types: {doc_types}")
-  print(f"Vectorstore created with {collection.count()} documents")
-  query = "Can you describe James Ketrenos' work history?"
-  top_docs = find_similar(llm, collection, query, top_k=3)
-  print(top_docs)
-
+    # When running directly, use absolute imports
+    import defines
+    
+    # Initialize Ollama client
+    llm = ollama.Client(host=defines.ollama_api_url)
+    
+    # Start the file watcher (with initialization)
+    observer, file_watcher = start_file_watcher(
+        llm, 
+        defines.doc_dir, 
+        recreate=True,  # Start fresh
+        initialize=True  # Load all documents initially
+    )
+    
+    # Example query
+    query = "Can you describe James Ketrenos' work history?"
+    top_docs = file_watcher.find_similar(query, top_k=3)
+    logging.info(top_docs)
+    
+    try:
+        # Keep the main thread running
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        observer.stop()
+    observer.join()