Pre claude rewrite

This commit is contained in:
James Ketr 2025-04-17 15:04:10 -07:00
parent c00f3068fa
commit eb2629bcce
15 changed files with 2203 additions and 257 deletions

2
.gitignore vendored
View File

@ -3,3 +3,5 @@ cache/**
jupyter/**
ollama/**
sessions/**
chromadb/**
chromadb-prod/**

View File

@ -139,14 +139,7 @@ RUN python setup.py clean --all bdist_wheel --linux
# * ollama-ipex-llm
# * src/server.py - model server supporting RAG and fine-tuned models
#
# Agents using server:
# * src/web-ui.py - REACT server (backstory.ketrenos.com)
# * src/irc.py - IRC backend (irc.libera.chat #backstory-test)
# * src/cli.py - Command line chat
#
# Utilities:
# * src/training-fine-tune.py - Perform fine-tuning on currated documents
FROM ubuntu:oracular AS backstory
FROM ubuntu:oracular AS llm-base
COPY --from=python-build /opt/python /opt/python
@ -184,10 +177,22 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
WORKDIR /opt/backstory
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libncurses6 \
rsync \
jq \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
RUN usermod -aG ze-monitor root
WORKDIR /opt/backstory
# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/backstory/venv
@ -204,7 +209,6 @@ RUN { \
# Activate the pip environment on all shell calls
SHELL [ "/opt/backstory/shell" ]
# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
@ -243,29 +247,17 @@ RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
# NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env
RUN pip install einops diffusers # Required for IPEX optimize(), which is required to convert from Params4bit
# Needed by src/utils/chroma.py
RUN pip install watchdog
# Install packages needed for stock.py
RUN pip install yfinance pyzt geopy PyHyphen nltk
# While running in development mode via bind mounts, don't copy
# the source or follow on containers will always rebuild whenever
# the source changes.
#COPY /src/ /opt/backstory/src/
FROM llm-base AS backstory
COPY /src/requirements.txt /opt/backstory/src/requirements.txt
RUN pip install -r /opt/backstory/src/requirements.txt
SHELL [ "/bin/bash", "-c" ]
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libncurses6 \
rsync \
jq \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
RUN usermod -aG ze-monitor root
COPY /src/ /opt/backstory/src/
SHELL [ "/bin/bash", "-c" ]
@ -288,6 +280,14 @@ RUN { \
echo ' exec /bin/bash'; \
echo ' fi' ; \
echo 'else'; \
echo ' if [[ "${PRODUCTION}" -eq 0 ]]; then'; \
echo ' while true; do'; \
echo ' cd /opt/backstory/frontend'; \
echo ' echo "Launching Backstory React Frontend..."'; \
echo ' npm start "${@}" || echo "Backstory frontend died. Restarting in 3 seconds."'; \
echo ' sleep 3'; \
echo ' done &' ; \
echo ' fi' ; \
echo ' while true; do'; \
echo ' echo "Launching Backstory server..."'; \
echo ' python src/server.py "${@}" || echo "Backstory server died. Restarting in 3 seconds."'; \
@ -332,9 +332,8 @@ RUN apt-get update \
WORKDIR /opt/ollama
# Download the nightly ollama release from ipex-llm
#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0/ollama-ipex-llm-2.2.0-ubuntu.tgz
#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250415-ubuntu.tgz
RUN wget -qO - ${OLLAMA_VERSION} | \
tar --strip-components=1 -C . -xzv
@ -414,7 +413,7 @@ ENV PATH=/opt/ollama:${PATH}
ENTRYPOINT [ "/entrypoint.sh" ]
FROM backstory AS jupyter
FROM llm-base AS jupyter
SHELL [ "/opt/backstory/shell" ]
@ -425,6 +424,8 @@ RUN pip install \
&& jupyter lab build --dev-build=False --minimize=False
# END setup Jupyter
COPY /src/requirements.txt /opt/backstory/src/requirements.txt
RUN pip install -r /opt/backstory/src/requirements.txt
SHELL [ "/bin/bash", "-c" ]

View File

@ -1,12 +1,57 @@
# Backstory
Backstory is an AI Resume agent that provides context into a diverse career narative.
Backstory is an AI Resume agent that provides context into a diverse career narative. Backstory will take a collection of documents about a person and provide:
This project provides an AI chat client. While it can run a variety of LLM models, it is currently running Qwen2.5:7b. In addition to the standard model, enhanced with a RAG expert system that will chunk and embed any text files in `./docs`. It also exposes several utility tools for the LLM to use to obtain real-time data.
* Through the use of several custom Language Processing Modules (LPM), develop a comprehensive set of test and validation data based on the input documents. While manual review of content should be performed to ensure accuracy, several LLM techniques are employed in the LPM in order to isolate and remove hallucinations and inaccuracies in the test and validation data.
* Utilizing quantized low-rank adaption (QLoRA) and parameter effecient tine tuning (PEFT,) provide a hyper parameter tuned and customized LLM for use in chat and content creation scenarios with expert knowledge about the individual.
* Post-training, utilize additional RAG content to further enhance the information domain used in conversations and content generation.
* An integrated document publishing work flow that will transform a "Job Description" into a customized "Resume" for the person the LLM has been trained on.
* "Fact Check" the resulting resume against the RAG content directly provided by the user in order to remove hallucinations.
Internally, it is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
While it can run a variety of LLM models, Backstory is currently running Qwen2.5:7b. In addition to the standard model, the chat pipeline also exposes several utility tools for the LLM to use to obtain real-time data.
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/backstory/issues)--I have some routines I can put in, but don't have a way to test them.
Internally, Backstory is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
This system was built to run on commodity hardware, for example the Intel Arc B580 GPU with 12G of RAM.
# Zero to Hero
Before you spend too much time learning how to customize Backstory, you may want to see it in action with your own information. Fine-tuning the LLM with your data can take a while, so you might want to see what the system can do just by utilizing retrieval-augmented generation.
Backstory works by generating a set of facts about you. Those facts can be exposed to the LLM via RAG, or baked into the LLM by fine-tuning. In either scenario, Backstory needs to know your relationship with a given fact.
To facilitate this, Backstory expects the documents it reads to be marked with information that highlights your role in relation to the document. That information is either stored within each document as [Front Matter (YAML)](https://jekyllrb.com/docs/front-matter/) or as a YAML sidecar file (a file with the same name as the content, plus the extension .yml)
The two key items expected in the front matter / sidecar are:
```
---
person:
role:
---
```
For example, a file `resume.md` could have the following either as front matter or in the file `resume.md.yml`:
```
---
person: James Ketrenos
role: This resume is about James Ketrenos and refers to his work history.
---
```
A document from a project you worked on, in my case `backstory`, could have the following front matter:
```
---
person: James Ketrenos
role: Designed, built, and deployed the application described in this document.
---
```
During both RAG extraction and during fine-tuning, that context information is provided to the LLM so it can better respond to queries about the user and that user's specific roles.
This project is seeded with a minimal resume and document about backstory. Those are present in the `docs/` directory, which is where you will place your content. If you do not replace anything and run the system as-is, Backstory will be able to provide information about me via RAG (there is fine-tuned data provided in this project archive.)
# Installation
@ -14,12 +59,6 @@ This project uses docker containers to build. As this was originally written to
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
## Want to run under WSL2? No can do...
https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
The A- and B-series discrete GPUs do not support SR-IOV, required for the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
## Building
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)

0
chromadb-prod/.keep Normal file
View File

0
chromadb/.keep Normal file
View File

View File

@ -8,6 +8,43 @@ services:
restart: "no"
env_file:
- .env
environment:
- PRODUCTION=0
- MODEL_NAME=${MODEL_NAME:-qwen2.5:3b}
devices:
- /dev/dri:/dev/dri
depends_on:
- ollama
networks:
- internal
ports:
- 8912:8911 # Flask React server
- 3000:3000 # REACT expo while developing frontend
volumes:
- ./cache:/root/.cache # Persist all models and GPU kernel cache
- ./sessions:/opt/backstory/sessions:rw # Persist sessions
- ./chromadb:/opt/backstory/chromadb:rw # Persist ChromaDB
- ./docs:/opt/backstory/docs:ro # Live mount of RAG content
- ./src:/opt/backstory/src:rw # Live mount server src
- ./frontend:/opt/backstory/frontend:rw # Live mount frontend src
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
backstory-prod:
build:
context: .
dockerfile: Dockerfile
target: backstory
image: backstory
container_name: backstory-prod
restart: "always"
env_file:
- .env
environment:
- PRODUCTION=1
- MODEL_NAME=${MODEL_NAME:-qwen2.5:7b}
devices:
- /dev/dri:/dev/dri
depends_on:
@ -16,12 +53,11 @@ services:
- internal
ports:
- 8911:8911 # Flask React server
- 3000:3000 # REACT expo while developing frontend
volumes:
- ./cache:/root/.cache # Persist all models and GPU kernel cache
- ./chromadb-prod:/opt/backstory/chromadb:rw # Persist ChromaDB
- ./sessions:/opt/backstory/sessions:rw # Persist sessions
- ./docs:/opt/backstory/docs:ro # Live mount of RAG content
- ./src:/opt/backstory/src:rw # Live mount server src
- ./frontend:/opt/backstory/frontend:rw # Live mount frontend src
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
@ -34,7 +70,7 @@ services:
dockerfile: Dockerfile
target: ollama
image: ollama
restart: "no"
restart: "always"
env_file:
- .env
environment:
@ -60,6 +96,8 @@ services:
dockerfile: Dockerfile
target: jupyter
image: jupyter
container_name: jupyter
restart: "always"
env_file:
- .env
devices:
@ -95,6 +133,7 @@ services:
dockerfile: Dockerfile
target: miniircd
image: miniircd
restart: "no"
env_file:
- .env
devices:

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,9 @@
"@fontsource/roboto": "^5.2.5",
"@mui/icons-material": "^7.0.1",
"@mui/material": "^7.0.1",
"@tensorflow/tfjs": "^4.22.0",
"@tensorflow/tfjs-backend-webgl": "^4.22.0",
"@tensorflow/tfjs-tsne": "^0.2.0",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.2.0",
@ -26,6 +29,7 @@
"rehype-katex": "^7.0.1",
"remark-gfm": "^4.0.1",
"remark-math": "^6.0.0",
"tsne-js": "^1.0.3",
"typescript": "^4.9.5",
"web-vitals": "^2.1.4"
},
@ -52,5 +56,8 @@
"last 1 firefox version",
"last 1 safari version"
]
},
"devDependencies": {
"@types/plotly.js": "^2.35.5"
}
}

View File

@ -36,6 +36,7 @@ import { Message, MessageList } from './Message';
import { MessageData } from './MessageMeta';
import { SeverityType } from './Snack';
import { ContextStatus } from './ContextStatus';
import { VectorVisualizer, ResultData } from './VectorVisualizer';
import './App.css';
@ -103,13 +104,11 @@ type SystemInfo = {
"CPU": string
};
const getConnectionBase = (loc: any): string => {
if (!loc.host.match(/.*battle-linux.*/)) {
return loc.protocol + "//" + loc.host;
} else {
return loc.protocol + "//battle-linux.ketrenos.com:8911";
return loc.protocol + "//battle-linux.ketrenos.com:8912";
}
}
@ -320,7 +319,7 @@ const App = () => {
const conversationRef = useRef<any>(null);
const [processing, setProcessing] = useState(false);
const [sessionId, setSessionId] = useState<string | undefined>(undefined);
const [loc,] = useState<Location>(window.location)
const [connectionBase,] = useState<string>(getConnectionBase(window.location))
const [mobileOpen, setMobileOpen] = useState(false);
const [isClosing, setIsClosing] = useState(false);
const [snackOpen, setSnackOpen] = useState(false);
@ -344,6 +343,7 @@ const App = () => {
const [resume, setResume] = useState<MessageData | undefined>(undefined);
const [facts, setFacts] = useState<MessageData | undefined>(undefined);
const timerRef = useRef<any>(null);
const [result, setResult] = useState<ResultData | undefined>(undefined);
const startCountdown = (seconds: number) => {
if (timerRef.current) clearInterval(timerRef.current);
@ -406,7 +406,7 @@ const App = () => {
if (systemInfo !== undefined || sessionId === undefined) {
return;
}
fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
fetch(connectionBase + `/api/system-info/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -420,7 +420,32 @@ const App = () => {
console.error('Error obtaining system information:', error);
setSnack("Unable to obtain system information.", "error");
});
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
}, [systemInfo, setSystemInfo, connectionBase, setSnack, sessionId])
// Get the collection to visualize
useEffect(() => {
if (result !== undefined || sessionId === undefined) {
return;
}
const fetchCollection = async () => {
try {
const response = await fetch(connectionBase + `/api/tsne/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ dimensions: 3 }),
});
const data = await response.json();
setResult(data);
} catch (error) {
console.error('Error obtaining collection information:', error);
setSnack("Unable to obtain collection information.", "error");
};
};
fetchCollection();
}, [result, setResult, connectionBase, setSnack, sessionId])
// Get the About markdown
useEffect(() => {
@ -451,7 +476,7 @@ const App = () => {
// Update the context status
const updateContextStatus = useCallback(() => {
fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
fetch(connectionBase + `/api/context-status/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -465,14 +490,14 @@ const App = () => {
console.error('Error getting context status:', error);
setSnack("Unable to obtain context status.", "error");
});
}, [setContextStatus, loc, setSnack, sessionId]);
}, [setContextStatus, connectionBase, setSnack, sessionId]);
// Set the initial chat history to "loading" or the welcome message if loaded.
useEffect(() => {
if (sessionId === undefined) {
setConversation([loadingMessage]);
} else {
fetch(getConnectionBase(loc) + `/api/history/${sessionId}`, {
fetch(connectionBase + `/api/history/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -492,17 +517,17 @@ const App = () => {
});
updateContextStatus();
}
}, [sessionId, setConversation, updateContextStatus, loc, setSnack]);
}, [sessionId, setConversation, updateContextStatus, connectionBase, setSnack]);
// Extract the sessionId from the URL if present, otherwise
// request a sessionId from the server.
useEffect(() => {
const url = new URL(loc.href);
const url = new URL(window.location.href);
const pathParts = url.pathname.split('/').filter(Boolean);
if (!pathParts.length) {
console.log("No session id -- creating a new session")
fetch(getConnectionBase(loc) + `/api/context`, {
fetch(connectionBase + `/api/context`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@ -520,7 +545,7 @@ const App = () => {
setSessionId(pathParts[0]);
}
}, [setSessionId, loc]);
}, [setSessionId, connectionBase]);
// If the systemPrompt has not been set, fetch it from the server
useEffect(() => {
@ -529,7 +554,7 @@ const App = () => {
}
const fetchTunables = async () => {
// Make the fetch request with proper headers
const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, {
const response = await fetch(connectionBase + `/api/tunables/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -544,7 +569,7 @@ const App = () => {
}
fetchTunables();
}, [sessionId, serverSystemPrompt, setServerSystemPrompt, loc]);
}, [sessionId, serverSystemPrompt, setServerSystemPrompt, connectionBase]);
// If the tools have not been set, fetch them from the server
useEffect(() => {
@ -554,7 +579,7 @@ const App = () => {
const fetchTools = async () => {
try {
// Make the fetch request with proper headers
const response = await fetch(getConnectionBase(loc) + `/api/tools/${sessionId}`, {
const response = await fetch(connectionBase + `/api/tools/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -573,7 +598,7 @@ const App = () => {
}
fetchTools();
}, [sessionId, tools, setTools, setSnack, loc]);
}, [sessionId, tools, setTools, setSnack, connectionBase]);
// If the jobDescription and resume have not been set, fetch them from the server
useEffect(() => {
@ -586,7 +611,7 @@ const App = () => {
const fetchResume = async () => {
try {
// Make the fetch request with proper headers
const response = await fetch(getConnectionBase(loc) + `/api/resume/${sessionId}`, {
const response = await fetch(connectionBase + `/api/resume/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -615,7 +640,7 @@ const App = () => {
}
fetchResume();
}, [sessionId, resume, jobDescription, setResume, setJobDescription, setSnack, loc]);
}, [sessionId, resume, jobDescription, setResume, setJobDescription, setSnack, connectionBase]);
// If the RAGs have not been set, fetch them from the server
useEffect(() => {
@ -625,7 +650,7 @@ const App = () => {
const fetchRags = async () => {
try {
// Make the fetch request with proper headers
const response = await fetch(getConnectionBase(loc) + `/api/rags/${sessionId}`, {
const response = await fetch(connectionBase + `/api/rags/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
@ -644,7 +669,7 @@ const App = () => {
}
fetchRags();
}, [sessionId, rags, setRags, setSnack, loc]);
}, [sessionId, rags, setRags, setSnack, connectionBase]);
// If context status changes, show a warning if necessary. If it drops
// back below the threshold, clear the warning trigger
@ -664,7 +689,7 @@ const App = () => {
const toggleRag = async (tool: Tool) => {
tool.enabled = !tool.enabled
try {
const response = await fetch(getConnectionBase(loc) + `/api/rags/${sessionId}`, {
const response = await fetch(connectionBase + `/api/rags/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
@ -686,7 +711,7 @@ const App = () => {
const toggleTool = async (tool: Tool) => {
tool.enabled = !tool.enabled
try {
const response = await fetch(getConnectionBase(loc) + `/api/tools/${sessionId}`, {
const response = await fetch(connectionBase + `/api/tools/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
@ -711,7 +736,7 @@ const App = () => {
}
const sendSystemPrompt = async (prompt: string) => {
try {
const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, {
const response = await fetch(connectionBase + `/api/tunables/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
@ -735,7 +760,7 @@ const App = () => {
sendSystemPrompt(systemPrompt);
}, [systemPrompt, setServerSystemPrompt, serverSystemPrompt, loc, sessionId, setSnack]);
}, [systemPrompt, setServerSystemPrompt, serverSystemPrompt, connectionBase, sessionId, setSnack]);
useEffect(() => {
if (sessionId === undefined) {
@ -743,7 +768,7 @@ const App = () => {
}
const sendMessageHistoryLength = async (length: number) => {
try {
const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, {
const response = await fetch(connectionBase + `/api/tunables/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
@ -766,11 +791,11 @@ const App = () => {
sendMessageHistoryLength(messageHistoryLength);
}, [messageHistoryLength, setMessageHistoryLength, loc, sessionId, setSnack]);
}, [messageHistoryLength, setMessageHistoryLength, connectionBase, sessionId, setSnack]);
const reset = async (types: ("rags" | "tools" | "history" | "system-prompt" | "message-history-length")[], message: string = "Update successful.") => {
try {
const response = await fetch(getConnectionBase(loc) + `/api/reset/${sessionId}`, {
const response = await fetch(connectionBase + `/api/reset/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
@ -883,7 +908,7 @@ const App = () => {
}
// Make the fetch request with proper headers
const response = await fetch(getConnectionBase(loc) + `/api/chat/${sessionId}`, {
const response = await fetch(connectionBase + `/api/chat/${sessionId}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@ -1076,6 +1101,7 @@ const App = () => {
onChange={handleTabChange} aria-label="Backstory navigation">
<Tab label="Backstory" icon={<Avatar sx={{ width: 24, height: 24 }} variant="rounded" alt="Backstory logo" src="/logo192.png" />} iconPosition="start" />
<Tab label="Resume Builder"/>
<Tab label="Visualizer" />
<Tab label="About"/>
</Tabs>
</Box>}
@ -1183,10 +1209,18 @@ const App = () => {
</CustomTabPanel>
<CustomTabPanel tab={tab} index={1}>
<ResumeBuilder {...{ isScrolledToBottom, scrollToBottom, facts, setFacts, resume, setResume, jobDescription, processing, setProcessing, setSnack, connectionBase: getConnectionBase(loc), sessionId }} />
<ResumeBuilder {...{ isScrolledToBottom, scrollToBottom, facts, setFacts, resume, setResume, jobDescription, processing, setProcessing, setSnack, connectionBase: connectionBase, sessionId }} />
</CustomTabPanel>
<CustomTabPanel tab={tab} index={2}>
<Box className="ChatBox">
<Box className="Conversation">
{result !== undefined && <VectorVisualizer {...{ result, connectionBase, sessionId }} />}
</Box>
</Box>
</CustomTabPanel>
<CustomTabPanel tab={tab} index={3}>
<Box className="ChatBox">
<Box className="Conversation">
<Message {...{ message: { role: 'assistant', content: about }, submitQuery }} />

View File

@ -0,0 +1,224 @@
import React, { useEffect, useState } from 'react';
import Box from '@mui/material/Box';
import Plot from 'react-plotly.js';
import TextField from '@mui/material/TextField';
import Tooltip from '@mui/material/Tooltip';
import Button from '@mui/material/Button';
import SendIcon from '@mui/icons-material/Send';
interface Metadata {
type?: string;
[key: string]: any;
}
interface ResultData {
embeddings: number[][] | number[][][];
documents: string[];
metadatas: Metadata[];
}
interface PlotData {
x: number[];
y: number[];
z?: number[];
colors: string[];
text: string[];
sizes: number[];
symbols: string[];
}
interface VectorVisualizerProps {
result: ResultData;
connectionBase: string;
sessionId?: string;
}
interface ChromaResult {
distances: number[];
documents: string[];
ids: string[];
metadatas: Metadata[];
query_embedding: number[];
query?: string;
vector_embedding?: number[];
}
const VectorVisualizer: React.FC<VectorVisualizerProps> = ({ result, connectionBase, sessionId }) => {
const [plotData, setPlotData] = useState<PlotData | null>(null);
const [query, setQuery] = useState<string>('');
const [queryEmbedding, setQueryEmbedding] = useState<ChromaResult | undefined>(undefined);
useEffect(() => {
if (!result || !result.embeddings) return;
if (result.embeddings.length === 0) return;
const vectors: number[][] = [...result.embeddings as number[][]];
const documents = [...result.documents || []];
const metadatas = [...result.metadatas || []];
if (queryEmbedding !== undefined && queryEmbedding.vector_embedding !== undefined) {
metadatas.unshift({ type: 'query' });
documents.unshift(queryEmbedding.query || '');
vectors.unshift(queryEmbedding.vector_embedding);
}
const is2D = vectors.every((v: number[]) => v.length === 2);
const is3D = vectors.every((v: number[]) => v.length === 3);
if (!is2D && !is3D) {
console.error('Vectors are neither 2D nor 3D');
return;
}
console.log('Vectors:', vectors);
// Placeholder color assignment
const colorMap: Record<string, string> = {
'query': '#00ff00',
};
const sizeMap: Record<string, number> = {
'query': 10,
};
const symbolMap: Record<string, string> = {
'query': 'circle',
};
const doc_types = metadatas.map(m => m.type || 'unknown');
const sizes = doc_types.map(type => {
if (!sizeMap[type]) {
sizeMap[type] = 5;
}
return sizeMap[type];
});
const symbols = doc_types.map(type => {
if (!symbolMap[type]) {
symbolMap[type] = 'circle';
}
return symbolMap[type];
});
const colors = doc_types.map(type => {
if (!colorMap[type]) {
colorMap[type] = '#ff0000';
}
return colorMap[type];
});
const x = vectors.map((v: number[]) => v[0]);
const y = vectors.map((v: number[]) => v[1]);
const text = documents.map((doc, i) => `Type: ${doc_types[i]}<br>Text: ${doc.slice(0, 100)}...`);
if (is3D) {
const z = vectors.map((v: number[]) => v[2]);
setPlotData({
x: x,
y: y,
z: z,
colors: colors,
sizes: sizes,
symbols: symbols,
text: text
});
} else {
setPlotData({
x: x,
y: y,
colors: colors,
sizes: sizes,
symbols: symbols,
text: text
});
}
}, [result, queryEmbedding]);
const handleKeyPress = (event: any) => {
if (event.key === 'Enter') {
sendQuery(query);
}
};
const sendQuery = async (query: string) => {
if (!query.trim()) return;
setQuery('');
const response = await fetch(`${connectionBase}/api/similarity/${sessionId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: query,
})
});
const chroma: ChromaResult = await response.json();
console.log('Chroma:', chroma);
setQueryEmbedding(chroma);
};
if (!plotData || sessionId === undefined) return (
<Box sx={{ display: 'flex', flexGrow: 1, justifyContent: 'center', alignItems: 'center' }}>
<div>Loading visualization...</div>
</Box>
);
return (
<>
<Box sx={{ display: 'flex', flexGrow: 1, justifyContent: 'center', alignItems: 'center' }}>
<Plot
data={[
{
x: plotData.x,
y: plotData.y,
z: plotData.z,
mode: 'markers',
marker: {
size: plotData.sizes,
symbol: plotData.symbols,
color: plotData.colors,
opacity: 0.8,
},
text: plotData.text,
hoverinfo: 'text',
type: plotData.z?.length ? 'scatter3d' : 'scatter',
},
]}
useResizeHandler={true}
config={{ responsive: true }}
style={{ width: '100%', height: '100%' }}
layout={{
autosize: true,
title: 'Vector Store Visualization',
xaxis: { title: 'x' },
yaxis: { title: 'y' },
zaxis: { title: 'z' },
margin: { r: 20, b: 10, l: 10, t: 40 },
}}
/>
</Box>
{ queryEmbedding !== undefined &&
<Box sx={{ display: 'flex', flexDirection: 'column', p: 1 }}>
<Box sx={{ fontSize: '0.8rem', mb: 1 }}>
Query: {queryEmbedding.query}
</Box>
</Box>
}
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
<TextField
variant="outlined"
fullWidth
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Enter query to find related documents..."
id="QueryInput"
/>
<Tooltip title="Send">
<Button sx={{ m: 1 }} variant="contained" onClick={() => { sendQuery(query); }}><SendIcon /></Button>
</Tooltip>
</Box>
</>
);
};
export type { VectorVisualizerProps, ResultData, Metadata };
export {
VectorVisualizer
};

2
frontend/src/declarations.d.ts vendored Normal file
View File

@ -0,0 +1,2 @@
declare module 'tsne-js';
declare module 'react-plotly.js';

View File

@ -1,44 +1,343 @@
python-dotenv
jupyterlab
ipywidgets
requests
numpy
pandas
scipy
scikit-learn
matplotlib
gensim
torch
transformers
tqdm
openai
gradio
langchain
tiktoken
faiss-cpu
langchain-openai
langchain_experimental
langchain_chroma
langchain[docarray]
datasets
sentencepiece
matplotlib
google-generativeai
anthropic
scikit-learn
unstructured
chromadb
plotly
jupyter-dash
beautifulsoup4
pydub
modal
ollama
accelerate
sentencepiece
bitsandbytes
psutil
setuptools
speedtest-cli
sentence_transformers
feedparser
accelerate==1.6.0
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.11.16
aiosignal==1.3.2
annotated-types==0.7.0
ansi2html==1.9.2
anthropic==0.49.0
anyio==4.9.0
appdirs==1.4.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asgiref==3.8.1
asttokens==3.0.0
async-lru==2.0.5
attrs==25.3.0
babel==2.17.0
backoff==2.2.1
bcrypt==4.3.0
beautifulsoup4==4.13.4
bigdl-core-xe-all==2.7.0b20250416
bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl#sha256=66deda2b99cee0d4e52a183d9bac5c8e8618cd9b4d4933ccf23b908622d6b879
bleach==6.2.0
blinker==1.9.0
bs4==0.0.2
build==1.2.2.post1
cachetools==5.5.2
certifi==2025.1.31
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.1
chroma-hnswlib==0.7.6
chromadb==0.6.3
click==8.1.8
coloredlogs==15.0.1
comm==0.2.2
contourpy==1.3.2
cryptography==44.0.2
cycler==0.12.1
dash==3.0.3
dataclasses-json==0.6.7
datasets==3.5.0
debugpy==1.8.14
decorator==5.2.1
defusedxml==0.7.1
Deprecated==1.2.18
diffusers==0.33.1
dill==0.3.8
distro==1.9.0
dpcpp-cpp-rt==2025.0.4
durationpy==0.9
einops==0.8.1
emoji==2.14.1
eval_type_backport==0.2.2
executing==2.2.0
faiss-cpu==1.10.0
fastapi==0.115.9
fastjsonschema==2.21.1
feedparser==6.0.11
ffmpy==0.5.0
filelock==3.13.1
filetype==1.2.0
Flask==3.0.3
flask-cors==5.0.1
flask-sock==0.7.0
flatbuffers==25.2.10
fonttools==4.57.0
fqdn==1.5.1
frozendict==2.4.6
frozenlist==1.5.0
fsspec==2024.6.1
gensim==4.3.3
geographiclib==2.0
geopy==2.4.1
google-ai-generativelanguage==0.6.15
google-api-core==2.24.2
google-api-python-client==2.167.0
google-auth==2.39.0
google-auth-httplib2==0.2.0
google-generativeai==0.8.4
googleapis-common-protos==1.70.0
gradio==5.25.2
gradio_client==1.8.0
greenlet==3.2.0
groovy==0.1.2
grpcio==1.71.0
grpcio-status==1.71.0
grpclib==0.4.7
h11==0.14.0
h2==4.2.0
hpack==4.1.0
html5lib==1.1
httpcore==1.0.8
httplib2==0.22.0
httptools==0.6.4
httpx==0.28.1
httpx-sse==0.4.0
huggingface-hub==0.30.2
humanfriendly==10.0
hyperframe==6.1.0
idna==3.10
impi-devel==2021.14.1
impi-rt==2021.14.1
importlib_metadata==8.6.1
importlib_resources==6.5.2
intel-cmplr-lib-rt==2025.0.2
intel-cmplr-lib-ur==2025.0.2
intel-cmplr-lic-rt==2025.0.2
intel-opencl-rt==2025.0.4
intel-openmp==2025.0.4
intel-pti==0.10.0
intel-sycl-rt==2025.0.2
intel_extension_for_pytorch==2.6.10+xpu
ipex-llm @ file:///opt/wheels/ipex_llm-2.2.0.dev0-py3-none-any.whl#sha256=5023ff4dc9799838486b4d160d5f3dcd5f6d3bb9ac8a2c6cabaf90034b540ba3
ipykernel==6.29.5
ipython==9.1.0
ipython_pygments_lexers==1.1.1
ipywidgets==8.1.6
isoduration==20.11.0
itsdangerous==2.2.0
jedi==0.19.2
Jinja2==3.1.4
jiter==0.9.0
joblib==1.4.2
json5==0.12.0
jsonpatch==1.33
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
jupyter-dash==0.4.2
jupyter-events==0.12.0
jupyter-lsp==2.2.5
jupyter_client==8.6.3
jupyter_core==5.7.2
jupyter_server==2.15.0
jupyter_server_terminals==0.5.3
jupyterlab==4.4.0
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
jupyterlab_widgets==3.0.14
kiwisolver==1.4.8
kubernetes==32.0.1
langchain==0.3.23
langchain-chroma==0.2.3
langchain-community==0.3.21
langchain-core==0.3.52
langchain-experimental==0.3.4
langchain-ollama==0.3.2
langchain-openai==0.3.13
langchain-text-splitters==0.3.8
langdetect==1.0.9
langsmith==0.3.31
llvmlite==0.44.0
lxml==5.3.2
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.26.1
matplotlib==3.10.1
matplotlib-inline==0.1.7
mdurl==0.1.2
mistune==3.1.3
mkl==2025.0.1
mkl-dpcpp==2025.0.1
mmh3==5.1.0
modal==0.74.4
monotonic==1.6
mpmath==1.3.0
multidict==6.4.3
multiprocess==0.70.16
multitasking==0.0.11
mypy-extensions==1.0.0
narwhals==1.35.0
nbclient==0.10.2
nbconvert==7.16.6
nbformat==5.10.4
nest-asyncio==1.6.0
networkx==3.3
nltk==3.9.1
notebook_shim==0.2.4
numba==0.61.2
numpy==1.26.4
oauthlib==3.2.2
olefile==0.47
ollama==0.4.8
oneccl==2021.14.1
oneccl-bind-pt==2.6.0+xpu
oneccl-devel==2021.14.1
onemkl-sycl-blas==2025.0.1
onemkl-sycl-datafitting==2025.0.1
onemkl-sycl-dft==2025.0.1
onemkl-sycl-lapack==2025.0.1
onemkl-sycl-rng==2025.0.1
onemkl-sycl-sparse==2025.0.1
onemkl-sycl-stats==2025.0.1
onemkl-sycl-vm==2025.0.1
onnxruntime==1.21.0
openai==1.75.0
opentelemetry-api==1.32.1
opentelemetry-exporter-otlp-proto-common==1.32.1
opentelemetry-exporter-otlp-proto-grpc==1.32.1
opentelemetry-instrumentation==0.53b1
opentelemetry-instrumentation-asgi==0.53b1
opentelemetry-instrumentation-fastapi==0.53b1
opentelemetry-proto==1.32.1
opentelemetry-sdk==1.32.1
opentelemetry-semantic-conventions==0.53b1
opentelemetry-util-http==0.53b1
orjson==3.10.16
overrides==7.7.0
packaging==24.1
pandas==2.2.3
pandocfilters==1.5.1
parso==0.8.4
peewee==3.17.9
peft==0.15.2
pexpect==4.9.0
pillow==11.0.0
platformdirs==4.3.7
plotly==6.0.1
posthog==3.25.0
prometheus_client==0.21.1
prompt_toolkit==3.0.51
propcache==0.3.1
proto-plus==1.26.1
protobuf==5.29.4
psutil==7.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
pyarrow==19.0.1
pyasn1==0.6.1
pyasn1_modules==0.4.2
pycparser==2.22
pydantic==2.11.3
pydantic-settings==2.8.1
pydantic_core==2.33.1
pydub==0.25.1
Pygments==2.19.1
PyHyphen==4.0.4
pynndescent==0.5.13
pyparsing==3.2.3
pypdf==5.4.0
PyPika==0.48.9
pyproject_hooks==1.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.1.0
python-iso639==2025.2.18
python-json-logger==3.3.0
python-magic==0.4.27
python-multipart==0.0.20
python-oxmsg==0.0.2
pytorch-triton-xpu==3.2.0
pytz==2025.2
PyYAML==6.0.2
pyzmq==26.4.0
pyzt==0.0.2
RapidFuzz==3.13.0
referencing==0.36.2
regex==2024.11.6
requests==2.32.3
requests-oauthlib==2.0.0
requests-toolbelt==1.0.0
retrying==1.3.4
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rich==14.0.0
rpds-py==0.24.0
rsa==4.9.1
ruamel.yaml==0.18.10
ruamel.yaml.clib==0.2.12
ruff==0.11.5
safehttpx==0.1.6
safetensors==0.5.3
scikit-learn==1.6.1
scipy==1.13.1
semantic-version==2.10.0
Send2Trash==1.8.3
sentence-transformers==3.4.0
sentencepiece==0.2.0
sgmllib3k==1.0.0
shellingham==1.5.4
sigtools==4.0.1
simple-websocket==1.1.0
six==1.17.0
smart-open==7.1.0
sniffio==1.3.1
soupsieve==2.6
speedtest-cli==2.1.3
SQLAlchemy==2.0.40
stack-data==0.6.3
starlette==0.45.3
sympy==1.13.1
synchronicity==0.9.11
tbb==2022.1.0
tcmlib==1.2.0
tenacity==9.1.2
terminado==0.18.1
threadpoolctl==3.6.0
tiktoken==0.9.0
tinycss2==1.4.0
tokenizers==0.21.1
toml==0.10.2
tomlkit==0.13.2
torch==2.6.0+xpu
torchaudio==2.6.0+xpu
torchvision==0.21.0+xpu
tornado==6.4.2
tqdm==4.67.1
traitlets==5.14.3
transformers==4.51.3
typer==0.15.2
types-certifi==2021.10.8.3
types-python-dateutil==2.9.0.20241206
types-toml==0.10.8.20240310
typing-inspect==0.9.0
typing-inspection==0.4.0
typing_extensions==4.12.2
tzdata==2025.2
umap-learn==0.5.7
umf==0.9.1
unstructured==0.17.2
unstructured-client==0.32.3
uri-template==1.3.0
uritemplate==4.1.1
urllib3==2.4.0
uvicorn==0.34.1
uvloop==0.21.0
watchdog==6.0.0
watchfiles==1.0.5
wcwidth==0.2.13
webcolors==24.11.1
webencodings==0.5.1
websocket-client==1.8.0
websockets==15.0.1
Werkzeug==3.0.6
widgetsnbextension==4.0.14
wrapt==1.17.2
wsproto==1.2.0
xxhash==3.5.0
yarl==1.19.0
yfinance==0.2.55
zipp==3.21.0
zstandard==0.23.0

View File

@ -24,13 +24,21 @@ try_import('ollama')
try_import('requests')
try_import('bs4', 'beautifulsoup4')
try_import('fastapi')
try_import('uvicorn')
try_import('sklearn')
try_import('numpy')
try_import('umap')
import ollama
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI, Request
from fastapi import FastAPI, Request, BackgroundTasks
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import numpy as np
#from sklearn.manifold import TSNE
import umap
from utils import (
rag as Rag,
@ -354,14 +362,15 @@ async def handle_tool_calls(message):
# %%
class WebServer:
def __init__(self, logging, client, collection, model=MODEL_NAME):
def __init__(self, logging, client, model=MODEL_NAME):
self.logging = logging
self.app = FastAPI()
self.contexts = {}
self.client = client
self.model = model
self.processing = False
self.collection = collection
self.file_watcher = None
self.observer = None
self.app.add_middleware(
CORSMiddleware,
@ -371,6 +380,26 @@ class WebServer:
allow_headers=["*"],
)
@self.app.on_event("startup")
async def startup_event():
# Start the file watcher
self.observer, self.file_watcher = Rag.start_file_watcher(
llm=client,
watch_directory=defines.doc_dir,
initialize=True, # Only loads documents if no hash state exists
recreate=False # Don't recreate if exists
)
print(f"API started with {self.file_watcher.collection.count()} documents in the collection")
@self.app.on_event("shutdown")
async def shutdown_event():
if self.observer:
self.observer.stop()
self.observer.join()
print("File watcher stopped")
self.setup_routes()
def setup_routes(self):
@ -381,6 +410,136 @@ class WebServer:
return RedirectResponse(url=f"/{context['id']}", status_code=307)
#return JSONResponse({"redirect": f"/{context['id']}"})
@self.app.get("/api/query")
async def query_documents(query: str, top_k: int = 3):
if not self.file_watcher:
return
"""Query the RAG system with the given prompt."""
results = self.file_watcher.find_similar(query, top_k=top_k)
return {
"query": query,
"results": [
{
"content": doc,
"metadata": meta,
"distance": dist
}
for doc, meta, dist in zip(
results["documents"],
results["metadatas"],
results["distances"]
)
]
}
@self.app.post("/api/refresh/{file_path:path}")
async def refresh_document(file_path: str, background_tasks: BackgroundTasks):
if not self.file_watcher:
return
"""Manually refresh a specific document in the collection."""
full_path = os.path.join(defines.doc_dir, file_path)
if not os.path.exists(full_path):
return {"status": "error", "message": "File not found"}
# Schedule the update in the background
background_tasks.add_task(
self.file_watcher.process_file_update, full_path
)
return {
"status": "success",
"message": f"Document refresh scheduled for {file_path}"
}
# @self.app.post("/api/refresh-all")
# async def refresh_all_documents():
# if not self.file_watcher:
# return
# """Refresh all documents in the collection."""
# # Re-initialize file hashes and process all files
# self.file_watcher._initialize_file_hashes()
# # Schedule updates for all files
# file_paths = self.file_watcher.file_hashes.keys()
# tasks = [self.file_watcher.process_file_update(path) for path in file_paths]
# # Wait for all updates to complete
# await asyncio.gather(*tasks)
# return {
# "status": "success",
# "message": f"Refreshed {len(file_paths)} documents",
# "document_count": file_watcher.collection.count()
# }
@self.app.put('/api/tsne/{context_id}')
async def put_tsne(context_id: str, request: Request):
if not self.file_watcher:
return
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
try:
data = await request.json()
dimensions = data.get('dimensions', 2)
except:
dimensions = 2
try:
result = self.file_watcher.collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
umap_model = umap.UMAP(n_components=dimensions, random_state=42)
embedding = umap_model.fit_transform(vectors)
context['umap_model'] = umap_model
result['embeddings'] = embedding.tolist()
return JSONResponse(result)
except Exception as e:
logging.error(e)
return JSONResponse({"error": str(e)}, 500)
@self.app.put('/api/similarity/{context_id}')
async def put_similarity(context_id: str, request: Request):
if not self.file_watcher:
return
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
if not context.get("umap_model"):
return JSONResponse({"error": "No umap_model found in context"}, status_code=404)
try:
data = await request.json()
query = data.get('query', '')
except:
query = ''
if not query:
return JSONResponse({"error": "No query provided"}, status_code=400)
try:
chroma_results = self.file_watcher.find_similar(query=query, top_k=10)
if not chroma_results:
return JSONResponse({"error": "No results found"}, status_code=404)
chroma_embedding = chroma_results["query_embedding"]
normalized = (chroma_embedding - chroma_embedding.min()) / (chroma_embedding.max() - chroma_embedding.min())
vector_embedding = context["umap_model"].transform([normalized])[0].tolist()
return JSONResponse({ **chroma_results, "query": query, "vector_embedding": vector_embedding })
except Exception as e:
logging.error(e)
#return JSONResponse({"error": str(e)}, 500)
@self.app.put('/api/reset/{context_id}')
async def put_reset(context_id: str, request: Request):
if not is_valid_uuid(context_id):
@ -634,8 +793,6 @@ class WebServer:
self.logging.info(f"Serve index.html for {path}")
return FileResponse(os.path.join(defines.static_content, 'index.html'))
import requests
def save_context(self, session_id):
"""
Serialize a Python dictionary to a file in the sessions directory.
@ -656,10 +813,14 @@ class WebServer:
# Create the full file path
file_path = os.path.join(defines.session_dir, session_id)
umap_model = context.get("umap_model")
if umap_model:
del context["umap_model"]
# Serialize the data to JSON and write to file
with open(file_path, 'w') as f:
json.dump(context, f)
if umap_model:
context["umap_model"] = umap_model
return session_id
def load_context(self, session_id):
@ -719,6 +880,9 @@ class WebServer:
return self.load_context(context_id)
async def chat(self, context, content):
if not self.file_watcher:
return
content = content.strip()
if not content:
yield {"status": "error", "message": "Invalid request"}
@ -744,7 +908,7 @@ class WebServer:
for rag in context["rags"]:
if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10)
chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
if chroma_results:
rag_docs.extend(chroma_results["documents"])
metadata["rag"] = { "name": rag["name"], **chroma_results }
@ -852,6 +1016,9 @@ class WebServer:
self.processing = False
async def generate_resume(self, context, content):
if not self.file_watcher:
return
content = content.strip()
if not content:
yield {"status": "error", "message": "Invalid request"}
@ -880,21 +1047,21 @@ class WebServer:
"prompt_eval_duration": 0,
}
rag_docs = []
resume_doc = open(defines.resume_doc, 'r').read()
rag_docs.append(resume_doc)
for rag in context["rags"]:
if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10)
chroma_results = self.file_watcher.find_similar(query=content, top_k=10)
if chroma_results:
rag_docs.extend(chroma_results["documents"])
metadata["rag"] = { "name": rag["name"], **chroma_results }
preamble = f"The current time is {DateTime()}\n"
if len(rag_docs):
preamble = f"""[WORK HISTORY]:\n"""
for doc in rag_docs:
preamble += doc
resume["rag"] += f"{doc}\n"
preamble += f"\n[/WORK HISTORY]\n"
preamble = f"""[WORK HISTORY]:\n"""
for doc in rag_docs:
preamble += f"{doc}\n"
resume["rag"] += f"{doc}\n"
preamble += f"\n[/WORK HISTORY]\n"
content = f"{preamble}\nUse the above WORK HISTORY to create the resume for this JOB DESCRIPTION. Do not use the JOB DESCRIPTION skills as skills the user posseses unless listed in WORK HISTORY:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n"
@ -987,8 +1154,13 @@ class WebServer:
def run(self, host='0.0.0.0', port=WEB_PORT, **kwargs):
import uvicorn
uvicorn.run(self.app, host=host, port=port)
try:
uvicorn.run(self.app, host=host, port=port)
except KeyboardInterrupt:
if self.observer:
self.observer.stop()
if self.observer:
self.observer.join()
# %%
@ -1004,17 +1176,16 @@ def main():
client = ollama.Client(host=args.ollama_server)
model = args.ollama_model
documents = Rag.load_text_files(defines.doc_dir)
print(f"Documents loaded {len(documents)}")
collection = Rag.get_vector_collection()
chunks = Rag.create_chunks_from_documents(documents)
Rag.add_embeddings_to_collection(client, collection, chunks)
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types: {doc_types}")
print(f"Vectorstore created with {collection.count()} documents")
# documents = Rag.load_text_files(defines.doc_dir)
# print(f"Documents loaded {len(documents)}")
# chunks = Rag.create_chunks_from_documents(documents)
# doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
# print(f"Document types: {doc_types}")
# print(f"Vectorstore created with {collection.count()} documents")
web_server = WebServer(logging, client, collection, model)
web_server = WebServer(logging, client, model)
logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)
main()

View File

@ -1,11 +1,14 @@
import os
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
#model = "deepseek-r1:7b" # Tool calls don't work
#model="mistral:7b" # Tool calls don't work
#model = "llama3.2"
model="qwen2.5:7b"
model = os.getenv('MODEL_NAME', 'qwen2.5:7b')
encoding_model="mxbai-embed-large"
persist_directory="/root/.cache/chromadb"
persist_directory = os.getenv('PERSIST_DIR', "/opt/backstory/chromadb")
max_context = 2048*8*2
doc_dir = "/opt/backstory/docs/"
session_dir = "/opt/backstory/sessions"
static_content = '/opt/backstory/frontend/deployed'
static_content = '/opt/backstory/frontend/deployed'
resume_doc = '/opt/backstory/docs/resume/generic.txt'

View File

@ -1,118 +1,481 @@
__all__ = [
'load_text_files',
'create_chunks_from_documents',
'get_vector_collection',
'add_embeddings_to_collection',
'find_similar'
]
import os
import glob
import time
import hashlib
import asyncio
import logging
import os
import glob
import time
import hashlib
import asyncio
import json
import pickle
import numpy as np
import chromadb
import ollama
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document # Import the Document class
from langchain.schema import Document
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
# Import your existing modules
if __name__ == "__main__":
# When running directly, use absolute imports
import defines
# When running directly, use absolute imports
import defines
else:
# When imported as a module, use relative imports
from . import defines
# When imported as a module, use relative imports
from . import defines
def load_text_files(directory, encoding="utf-8"):
file_paths = glob.glob(os.path.join(directory, "**/*"), recursive=True)
documents = []
__all__ = [
'ChromaDBFileWatcher',
'start_file_watcher'
]
for file_path in file_paths:
if os.path.isfile(file_path): # Ensure it's a file, not a directory
class ChromaDBFileWatcher(FileSystemEventHandler):
def __init__(self, llm, watch_directory, loop, persist_directory=None, collection_name="documents",
chunk_size=1000, chunk_overlap=200, recreate=False):
self.llm = llm
self.watch_directory = watch_directory
self.persist_directory = persist_directory or defines.persist_directory
self.collection_name = collection_name
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
self.loop = loop
# Initialize ChromaDB collection
self.collection = self._get_vector_collection(recreate=recreate)
# Setup text splitter
self.text_splitter = CharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)
# Track file hashes and processing state
self.file_hashes: dict[str, str] = {}
self.update_lock = asyncio.Lock()
self.processing_files = set()
# Initialize file hashes
self.llm = llm
self.watch_directory = watch_directory
self.persist_directory = persist_directory or defines.persist_directory
self.collection_name = collection_name
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
# Path for storing file hash state
self.hash_state_path = os.path.join(self.persist_directory, f"{collection_name}_hash_state.json")
# Initialize ChromaDB collection
self.collection = self._get_vector_collection(recreate=recreate)
# Setup text splitter
self.text_splitter = CharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)
# Track file hashes and processing state
self.file_hashes = self._load_hash_state()
self.update_lock = asyncio.Lock()
self.processing_files = set()
# Only scan for new/changed files if we have previous hash state
if not self.file_hashes:
self._initialize_file_hashes()
else:
self._update_file_hashes()
def collection(self):
return self.collection
def _save_hash_state(self):
"""Save the current file hash state to disk."""
try:
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(self.hash_state_path), exist_ok=True)
with open(self.hash_state_path, 'w') as f:
json.dump(self.file_hashes, f)
logging.info(f"Saved hash state with {len(self.file_hashes)} entries")
except Exception as e:
logging.error(f"Error saving hash state: {e}")
def _load_hash_state(self):
"""Load the file hash state from disk."""
if os.path.exists(self.hash_state_path):
try:
with open(file_path, "r", encoding=encoding) as f:
content = f.read()
# Extract top-level directory
rel_path = os.path.relpath(file_path, directory) # Relative to base directory
top_level_dir = rel_path.split(os.sep)[0] # Get the first directory in the path
documents.append(Document(
page_content=content, # Required format for LangChain
metadata={"doc_type": top_level_dir, "path": file_path}
))
with open(self.hash_state_path, 'r') as f:
hash_state = json.load(f)
logging.info(f"Loaded hash state with {len(hash_state)} entries")
return hash_state
except Exception as e:
print(f"Failed to load {file_path}: {e}")
logging.error(f"Error loading hash state: {e}")
return {}
def _update_file_hashes(self):
"""Update file hashes by checking for new or modified files."""
# Check for new or modified files
file_paths = glob.glob(os.path.join(self.watch_directory, "**/*"), recursive=True)
files_checked = 0
files_changed = 0
for file_path in file_paths:
if os.path.isfile(file_path):
files_checked += 1
current_hash = self._get_file_hash(file_path)
if not current_hash:
continue
return documents
# If file is new or changed
if file_path not in self.file_hashes or self.file_hashes[file_path] != current_hash:
self.file_hashes[file_path] = current_hash
files_changed += 1
# Schedule an update for this file
asyncio.run_coroutine_threadsafe(self.process_file_update(file_path), self.loop)
logging.info(f"File changed: {file_path}")
# Check for deleted files
deleted_files = []
for file_path in self.file_hashes:
if not os.path.exists(file_path):
deleted_files.append(file_path)
# Schedule removal
asyncio.run_coroutine_threadsafe(self.remove_file_from_collection(file_path), self.loop)
logging.info(f"File deleted: {file_path}")
# Remove deleted files from hash state
for file_path in deleted_files:
del self.file_hashes[file_path]
logging.info(f"Checked {files_checked} files: {files_changed} new/changed, {len(deleted_files)} deleted")
# Save the updated state
self._save_hash_state()
# ... rest of existing methods ...
async def process_file_update(self, file_path):
"""Process a file update event."""
# Skip if already being processed
if file_path in self.processing_files:
return
try:
self.processing_files.add(file_path)
# Wait a moment to ensure the file write is complete
await asyncio.sleep(0.5)
# Check if content changed via hash
current_hash = self._get_file_hash(file_path)
if not current_hash: # File might have been deleted or is inaccessible
return
if file_path in self.file_hashes and self.file_hashes[file_path] == current_hash:
# File hasn't actually changed in content
return
# Update file hash
self.file_hashes[file_path] = current_hash
# Process and update the file in ChromaDB
async with self.update_lock:
await self._update_document_in_collection(file_path)
# Save the hash state after successful update
self._save_hash_state()
except Exception as e:
logging.error(f"Error processing update for {file_path}: {e}")
finally:
self.processing_files.discard(file_path)
async def remove_file_from_collection(self, file_path):
"""Remove all chunks related to a deleted file."""
async with self.update_lock:
try:
# Find all documents with the specified path
results = self.collection.get(
where={"path": file_path}
)
if results and 'ids' in results and results['ids']:
self.collection.delete(ids=results['ids'])
logging.info(f"Removed {len(results['ids'])} chunks for deleted file: {file_path}")
# Remove from hash dictionary
if file_path in self.file_hashes:
del self.file_hashes[file_path]
# Save the updated hash state
self._save_hash_state()
except Exception as e:
logging.error(f"Error removing file from collection: {e}")
def get_vector_collection(path=defines.persist_directory, name="documents"):
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path=path, settings=chromadb.Settings(anonymized_telemetry=False))
def _get_vector_collection(self, recreate=False):
"""Get or create a ChromaDB collection."""
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(
path=self.persist_directory,
settings=chromadb.Settings(anonymized_telemetry=False)
)
# Check if the collection exists and delete it
if os.path.exists(path):
try:
chroma_client.delete_collection(name=name)
except Exception as e:
print(f"Failed to delete existing collection: {e}")
# Check if the collection exists and delete it if recreate is True
if recreate and os.path.exists(self.persist_directory):
try:
chroma_client.delete_collection(name=self.collection_name)
except Exception as e:
logging.error(f"Failed to delete existing collection: {e}")
return chroma_client.get_or_create_collection(name=name)
return chroma_client.get_or_create_collection(
name=self.collection_name,
metadata={
"hnsw:space": "cosine"
})
def load_text_files(self, directory=None, encoding="utf-8"):
"""Load all text files from a directory into Document objects."""
directory = directory or self.watch_directory
file_paths = glob.glob(os.path.join(directory, "**/*"), recursive=True)
documents = []
# Function to generate embeddings using Ollama
def get_embedding(llm, text):
response = llm.embeddings(model=defines.model, prompt=text, options={ 'num_ctx': defines.max_context })
return response["embedding"]
for file_path in file_paths:
if os.path.isfile(file_path): # Ensure it's a file, not a directory
try:
with open(file_path, "r", encoding=encoding) as f:
content = f.read()
# Extract top-level directory
rel_path = os.path.relpath(file_path, directory)
top_level_dir = rel_path.split(os.sep)[0]
def add_embeddings_to_collection(llm, collection, chunks):
# Store documents in ChromaDB
for i, text_or_doc in enumerate(chunks):
# If input is a Document, extract the text content
if isinstance(text_or_doc, Document):
text = text_or_doc.page_content
metadata = text_or_doc.metadata
else:
text = text_or_doc # Assume it's already a string
metadata = { "index": i }
documents.append(Document(
page_content=content,
metadata={"doc_type": top_level_dir, "path": file_path}
))
except Exception as e:
logging.error(f"Failed to load {file_path}: {e}")
embedding = get_embedding(llm, text)
collection.add(
ids=[str(i)],
documents=[text],
embeddings=[embedding],
metadatas=[metadata]
)
return documents
def create_chunks_from_documents(self, docs):
"""Split documents into chunks using the text splitter."""
return self.text_splitter.split_documents(docs)
def get_embedding(self, text):
"""Generate embeddings using Ollama."""
response = self.llm.embeddings(
model=defines.model,
prompt=text,
options={"num_ctx": defines.max_context}
)
return self._normalize_embeddings(response["embedding"])
def add_embeddings_to_collection(self, chunks):
"""Add embeddings for chunks to the collection."""
for i, chunk in enumerate(chunks):
text = chunk.page_content
metadata = chunk.metadata
def find_similar(llm, collection, query, top_k=3):
query_embedding = get_embedding(llm, query)
results = collection.query(
query_embeddings=[query_embedding],
n_results=top_k,
include=["documents", "metadatas", "distances"]
# Generate a more unique ID based on content and metadata
content_hash = hashlib.md5(text.encode()).hexdigest()
path_hash = ""
if "path" in metadata:
path_hash = hashlib.md5(metadata["path"].encode()).hexdigest()[:8]
chunk_id = f"{path_hash}_{content_hash}_{i}"
embedding = self.get_embedding(text)
self.collection.add(
ids=[chunk_id],
documents=[text],
embeddings=[embedding],
metadatas=[metadata]
)
def find_similar(self, query, top_k=3):
"""Find similar documents to the query."""
query_embedding = self.get_embedding(query)
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=top_k,
include=["documents", "metadatas", "distances"]
)
return {
"query_embedding": query_embedding,
"ids": results["ids"][0],
"documents": results["documents"][0],
"distances": results["distances"][0],
"metadatas": results["metadatas"][0],
}
def _initialize_file_hashes(self):
"""Initialize the hash dictionary for all files in the directory."""
file_paths = glob.glob(os.path.join(self.watch_directory, "**/*"), recursive=True)
for file_path in file_paths:
if os.path.isfile(file_path):
hash = self._get_file_hash(file_path)
if hash:
self.file_hashes[file_path] = hash
def _get_file_hash(self, file_path):
"""Calculate MD5 hash of a file."""
try:
with open(file_path, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
except Exception as e:
logging.error(f"Error hashing file {file_path}: {e}")
return None
def on_modified(self, event):
"""Handle file modification events."""
if event.is_directory:
return
file_path = event.src_path
# Schedule the update using asyncio
asyncio.run_coroutine_threadsafe(self.process_file_update(file_path), self.loop)
logging.info(f"File modified: {file_path}")
def on_created(self, event):
"""Handle file creation events."""
if event.is_directory:
return
file_path = event.src_path
# Schedule the update using asyncio
asyncio.run_coroutine_threadsafe(self.process_file_update(file_path), self.loop)
logging.info(f"File created: {file_path}")
def on_deleted(self, event):
"""Handle file deletion events."""
if event.is_directory:
return
file_path = event.src_path
asyncio.run_coroutine_threadsafe(self.remove_file_from_collection(file_path), self.loop)
logging.info(f"File deleted: {file_path}")
def _normalize_embeddings(self, embeddings):
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
return embeddings / norms
async def _update_document_in_collection(self, file_path):
"""Update a document in the ChromaDB collection."""
try:
# Remove existing entries for this file
existing_results = self.collection.get(where={"path": file_path})
if existing_results and 'ids' in existing_results and existing_results['ids']:
self.collection.delete(ids=existing_results['ids'])
# Create document object in LangChain format
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract top-level directory
rel_path = os.path.relpath(file_path, self.watch_directory)
top_level_dir = rel_path.split(os.sep)[0]
document = Document(
page_content=content,
metadata={"doc_type": top_level_dir, "path": file_path}
)
# Create chunks
chunks = self.text_splitter.split_documents([document])
# Add chunks to collection
self.add_embeddings_to_collection(chunks)
logging.info(f"Updated {len(chunks)} chunks for file: {file_path}")
except Exception as e:
logging.error(f"Error updating document in collection: {e}")
def initialize_collection(self):
"""Initialize the collection with all documents from the watch directory."""
documents = self.load_text_files()
logging.info(f"Documents loaded: {len(documents)}")
chunks = self.create_chunks_from_documents(documents)
self.add_embeddings_to_collection(chunks)
logging.info(f"Vectorstore created with {self.collection.count()} documents")
# Display document types
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
logging.info(f"Document types: {doc_types}")
return len(chunks)
# Function to start the file watcher
def start_file_watcher(llm, watch_directory, persist_directory=None,
collection_name="documents", initialize=False, recreate=False):
"""
Start watching a directory for file changes.
Args:
llm: The language model client
watch_directory: Directory to watch for changes
persist_directory: Directory to persist ChromaDB and hash state
collection_name: Name of the ChromaDB collection
initialize: Whether to initialize the collection with all documents (only needed first time)
recreate: Whether to recreate the collection (will delete existing)
"""
loop = asyncio.get_event_loop()
file_watcher = ChromaDBFileWatcher(
llm,
watch_directory,
loop=loop,
persist_directory=persist_directory,
collection_name=collection_name,
recreate=recreate
)
return {
"query_embedding": query_embedding,
"ids": results["ids"][0],
"documents": results["documents"][0],
"distances": results["distances"][0],
"metadatas": results["metadatas"][0],
}
def create_chunks_from_documents(docs):
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
return text_splitter.split_documents(docs)
# Initialize collection if requested and no existing hash state
if initialize and not file_watcher.file_hashes:
file_watcher.initialize_collection()
# Start observer
observer = Observer()
observer.schedule(file_watcher, watch_directory, recursive=True)
observer.start()
logging.info(f"Started watching directory: {watch_directory}")
return observer, file_watcher
if __name__ == "__main__":
# When running directly, use absolute imports
import defines
llm = ollama.Client(host=defines.ollama_api_url)
documents = load_text_files(defines.doc_dir)
print(f"Documents loaded {len(documents)}")
collection = get_vector_collection()
chunks = create_chunks_from_documents(documents)
add_embeddings_to_collection(llm, collection, chunks)
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types: {doc_types}")
print(f"Vectorstore created with {collection.count()} documents")
query = "Can you describe James Ketrenos' work history?"
top_docs = find_similar(llm, collection, query, top_k=3)
print(top_docs)
# When running directly, use absolute imports
import defines
# Initialize Ollama client
llm = ollama.Client(host=defines.ollama_api_url)
# Start the file watcher (with initialization)
observer, file_watcher = start_file_watcher(
llm,
defines.doc_dir,
recreate=True, # Start fresh
initialize=True # Load all documents initially
)
# Example query
query = "Can you describe James Ketrenos' work history?"
top_docs = file_watcher.find_similar(query, top_k=3)
logging.info(top_docs)
try:
# Keep the main thread running
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()