Rethemed and renamed

This commit is contained in:
James Ketr 2025-04-07 17:20:00 -07:00
parent 792c9342df
commit 10eac5ba49
62 changed files with 1367 additions and 2352 deletions

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
.env
cache/**
jupyter/**
ollama/**
ollama/**
sessions/**

View File

@ -133,20 +133,20 @@ RUN pip install requests wheel
RUN python setup.py clean --all bdist_wheel --linux
#
# The main airc image:
# The main backstory image:
# * python 3.11
# * pytorch xpu w/ ipex-llm
# * ollama-ipex-llm
# * src/server.py - model server supporting RAG and fine-tuned models
#
# Agents using server:
# * src/web-ui.py - REACT server (airc.ketrenos.com)
# * src/irc.py - IRC backend (irc.libera.chat #airc-test)
# * src/web-ui.py - REACT server (backstory.ketrenos.com)
# * src/irc.py - IRC backend (irc.libera.chat #backstory-test)
# * src/cli.py - Command line chat
#
# Utilities:
# * src/training-fine-tune.py - Perform fine-tuning on currated documents
FROM ubuntu:oracular AS airc
FROM ubuntu:oracular AS backstory
COPY --from=python-build /opt/python /opt/python
@ -184,25 +184,25 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
WORKDIR /opt/airc
WORKDIR /opt/backstory
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/airc/venv
RUN python3 -m venv --system-site-packages /opt/backstory/venv
# Setup the docker pip shell
RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'source /opt/backstory/venv/bin/activate' ; \
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
} > /opt/airc/shell ; \
chmod +x /opt/airc/shell
} > /opt/backstory/shell ; \
chmod +x /opt/backstory/shell
# Activate the pip environment on all shell calls
SHELL [ "/opt/airc/shell" ]
SHELL [ "/opt/backstory/shell" ]
# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
@ -246,24 +246,15 @@ RUN pip install einops diffusers # Required for IPEX optimize(), which is requir
# Install packages needed for stock.py
RUN pip install yfinance pyzt geopy PyHyphen nltk
# While running in development mode via bind mounts, don't copy
# the source or follow on containers will always rebuild whenever
# the source changes.
#COPY /src/ /opt/backstory/src/
COPY /src/requirements.txt /opt/backstory/src/requirements.txt
RUN pip install -r /opt/backstory/src/requirements.txt
SHELL [ "/bin/bash", "-c" ]
# Don't install the full oneapi essentials; just the ones that we seem to need
# RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
# | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
# && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
# | tee /etc/apt/sources.list.d/oneAPI.list \
# && apt-get update \
# && DEBIAN_FRONTEND=noninteractive apt-get install -y \
# intel-oneapi-mkl-sycl-2025.0 \
# intel-oneapi-dnnl-2025.0 \
# intel-oneapi-dpcpp-cpp-2025.0 \
# && apt-get clean \
# && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# dpcpp is needed for LoRA backend when
# libze-dev is needed for LoRA/triton backend in order to build stuff
# Unfortunately, that fails with:
# ImportError: /opt/airc/venv/lib/python3.11/site-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libncurses6 \
@ -274,24 +265,18 @@ COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
RUN usermod -aG ze-monitor root
# While running in development mode via bind mounts, don't copy
# the source or follow on containers will always rebuild whenever
# the source changes.
#COPY /src/ /opt/airc/src/
COPY /src/requirements.txt /opt/airc/src/requirements.txt
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash'; \
echo 'echo "Container: airc"'; \
echo 'echo "Container: backstory"'; \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/airc"'; \
echo 'echo "Setting pip environment to /opt/backstory"'; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate'; \
echo 'source /opt/backstory/venv/bin/activate'; \
echo ''; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \
echo ' echo "Dropping to shell"'; \
echo ' shift' ; \
echo ' echo "Running: ${@}"' ; \
@ -301,8 +286,11 @@ RUN { \
echo ' exec /bin/bash'; \
echo ' fi' ; \
echo 'else'; \
echo ' echo "Launching AIRC chat server..."'; \
echo ' python src/airc.py "${@}"' ; \
echo ' while true; do'; \
echo ' echo "Launching Backstory server..."'; \
echo ' python src/server.py "${@}" || echo "Backstory server died. Restarting in 3 seconds."'; \
echo ' sleep 3'; \
echo ' done' ; \
echo 'fi'; \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
@ -422,9 +410,9 @@ ENV PATH=/opt/ollama:${PATH}
ENTRYPOINT [ "/entrypoint.sh" ]
FROM airc AS jupyter
FROM backstory AS jupyter
SHELL [ "/opt/airc/shell" ]
SHELL [ "/opt/backstory/shell" ]
# BEGIN setup Jupyter
RUN pip install \
@ -433,13 +421,13 @@ RUN pip install \
&& jupyter lab build --dev-build=False --minimize=False
# END setup Jupyter
RUN pip install -r /opt/airc/src/requirements.txt
RUN pip install -r /opt/backstory/src/requirements.txt
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'echo "Container: airc jupyter"' ; \
echo 'echo "Container: backstory jupyter"' ; \
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
@ -451,7 +439,7 @@ RUN { \
echo 'fi' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'source /opt/backstory/venv/bin/activate' ; \
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
echo 'while true; do' ; \
echo ' echo "Launching jupyter lab"' ; \

View File

@ -1,12 +1,12 @@
# AIRC (pronounced Eric)
# Backstory
AI is Really Cool
Backstory is an AI Resume agent that provides context into a diverse career narative.
This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
This project provides an AI chat client. While it can run a variety of LLM models, it is currently running Qwen2.5:7b. In addition to the standard model, enhanced with a RAG expert system that will chunk and embed any text files in `./docs`. It also exposes several utility tools for the LLM to use to obtain real-time data.
Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
Internally, it is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them.
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/backstory/issues)--I have some routines I can put in, but don't have a way to test them.
# Installation
@ -26,8 +26,8 @@ NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu]
```bash
git clone https://github.com/jketreno/airc
cd airc
git clone https://github.com/jketreno/backstory
cd backstory
docker compose build
```
@ -37,12 +37,12 @@ This project provides the following containers:
| Container | Purpose |
|:----------|:---------------------------------------------------------------|
| airc | Base container with GPU packages installed and configured |
| jupyter | airc + Jupyter notebook for running Jupyter sessions |
| backstory | Base container with GPU packages installed and configured. Main server entry point. Also used for frontend development. |
| jupyter | backstory + Jupyter notebook for running Jupyter sessions |
| miniircd | Tiny deployment of an IRC server for testing IRC agents |
| ollama | Installation of Intel's pre-built Ollama.cpp |
While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
While developing Backstory, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
@ -83,33 +83,43 @@ directory which will enable model downloads to be persisted.
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
### AIRC
### Backstory
To launch the airc shell interactively, with the pytorch 2.6 environment loaded, use the default entrypoint to launch a shell:
If you just want to run the pre-built environment, you can run:
```bash
docker compose run --rm airc shell
docker compose up -d
```
Once in the shell, you can then launch the model-server.py and then the airc.py client:
That will launch all the required containers. Once loaded, the following ports are exposed:
#### Container: backstory
* 8911 - http for the chat server. If you want https (recommended) then you should use an nginx reverse proxy to provide this endpoint. See src/server.py WEB_PORT and docker-compose `ports` under the `backstory` service. This port is safe to be exposed to the Internet if you want to expose this from your own service.
* 3000 - During interactive development of the frontend, the React server can be found at this port. By default, static content is served through port 8911. Do not expose this port to the Internet.
#### Container: jupyter
* 8888 - Jupyter Notebook. You can access this port for a Juptyer notebook running on top of the `backstory` base container.
* 60673 - This allows you to connect to Gradio apps from outside the container, provided you launch the Gradio on port 60673 `.launch(server_name="0.0.0.0", server_port=60673)`
#### Container: ollama
* 11434 - ollama server port. This should not be exposed to the Internet. You can use it via curl/wget locally. The `backstory` and `jupyter` containers are on the same Docker network, so they do not need this port exposed if you don't want it. See docker-compose.yml `ports` under `ollama`.
Once the above is running, to launch the backstory shell interactively:
```bash
docker compose run --rm airc shell
src/airc.py --ai-server=http://localhost:5000 &
src/model-server.py
docker compose exec --it backstory shell
```
By default, src/airc.py will connect to irc.libera.chat on the airc-test channel. See `python src/airc.py --help` for options.
By separating the model-server into its own process, you can develop and tweak the chat backend without losing the IRC connection established by airc.
### Jupyter
```bash
docker compose up jupyter -d
```
The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to `/opt/jupyter` in the container, which is where notebooks will be saved by default.
To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
@ -118,28 +128,17 @@ To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
You can run `ze-monitor` within the launched containers to monitor GPU usage.
```bash
containers=($(docker ps --filter "ancestor=airc" --format "{{.ID}}"))
if [[ ${#containers[*]} -eq 0 ]]; then
echo "Running airc container not found."
else
for container in ${containers[@]}; do
echo "Container ${container} devices:"
docker exec -it ${container} ze-monitor
done
fi
docker compose exec backstory ze-monitor --list
```
If an airc container is running, you should see something like:
```
Container 5317c503e771 devices:
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
```
You can then launch ze-monitor in that container specifying the device you wish to monitor:
To monitor a device:
```
containers=($(docker ps --filter "ancestor=airc" --format "{{.ID}}"))
docker exec -it ${containers[0]} ze-monitor --device 2
```bash
docker compose exec backstory ze-monitor --device 2
```

View File

@ -1,10 +1,10 @@
services:
airc:
backstory:
build:
context: .
dockerfile: Dockerfile
target: airc
image: airc
target: backstory
image: backstory
restart: "no"
env_file:
- .env
@ -15,13 +15,14 @@ services:
networks:
- internal
ports:
- 8911:8911
- 8911:8911 # Flask React server
- 3000:3000 # REACT expo while developing frontend
volumes:
- ./cache:/root/.cache
- ./src:/opt/airc/src:rw
- ./doc:/opt/airc/doc:ro
- ./results:/opt/airc/results:rw
- ./ketr-chat:/opt/airc/ketr-chat:rw # Live mount src
- ./cache:/root/.cache # Persist all models and GPU kernel cache
- ./sessions:/opt/backstory/sessions:rw # Persist sessions
- ./docs:/opt/backstory/docs:ro # Live mount of RAG content
- ./src:/opt/backstory/src:rw # Live mount server src
- ./frontend:/opt/backstory/frontend:rw # Live mount frontend src
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
@ -69,15 +70,11 @@ services:
ports:
- 8888:8888 # Jupyter Notebook
- 60673:60673 # Gradio
- 5000:5000 # Flask React server
- 3000:3000 # REACT expo
networks:
- internal
volumes:
- ./jupyter:/opt/jupyter:rw
- ./cache:/root/.cache
- ./src:/opt/airc/src:rw # Live mount src
- ./ketr-chat:/opt/airc/ketr-chat:rw # Live mount src
deploy:
resources:
limits:

BIN
frontend/favicon.ico Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

BIN
frontend/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

View File

@ -1,11 +1,11 @@
{
"name": "ketr-chat",
"name": "airc",
"version": "0.1.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "ketr-chat",
"name": "airc",
"version": "0.1.0",
"dependencies": {
"@emotion/react": "^11.14.0",

View File

@ -1,5 +1,5 @@
{
"name": "ketr-chat",
"name": "airc",
"version": "0.1.0",
"private": true,
"dependencies": {

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

Before

Width:  |  Height:  |  Size: 4.9 KiB

After

Width:  |  Height:  |  Size: 4.9 KiB

View File

@ -0,0 +1,7 @@
# About Backstory
This application was developed to achieve a few goals:
1. See if it is realistic to self-host AI LLMs. Turns out, it is -- with constraints.
2. Provide a recent example of my capabilities; many of my projects while working for Intel were internally facing. The source code to this project is available on [GitHub](https://github.com/jketreno/backstory).
3. My career at Intel was diverse. Over the years, I have worked on many projects almost everywhere in the computer ecosystem. That results in a resume that is either too long, or too short. This application is intended to provide a quick way for employers to ask the LLM about me.

BIN
frontend/public/favicon.ico Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

BIN
frontend/public/favicon.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

View File

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 22 KiB

View File

@ -24,7 +24,7 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>ai.ketrenos.com</title>
<title>Backstory</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>

BIN
frontend/public/logo.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

BIN
frontend/public/logo192.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
frontend/public/logo512.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 149 KiB

BIN
frontend/public/main-logo.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

View File

Before

Width:  |  Height:  |  Size: 5.0 KiB

After

Width:  |  Height:  |  Size: 5.0 KiB

View File

Before

Width:  |  Height:  |  Size: 6.2 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View File

Before

Width:  |  Height:  |  Size: 4.7 KiB

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -2,6 +2,20 @@ div {
box-sizing: border-box
}
.TabPanel {
display: flex;
height: 100%;
}
.MuiToolbar-root .MuiBox-root {
border-bottom: none;
}
.MuiTabs-root .MuiTabs-indicator {
background-color: orange;
}
.SystemInfo {
display: flex;
flex-direction: column;
@ -32,7 +46,7 @@ div {
display: flex;
flex-direction: column;
flex-grow: 1;
max-width: 800px;
max-width: 1024px;
margin: 0 auto;
}
@ -67,6 +81,7 @@ div {
padding: 10px;
flex-direction: column;
height: 100%;
max-height: 100%;
}
.user-message.MuiCard-root {
@ -89,6 +104,7 @@ div {
flex-grow: 0;
}
.About.MuiCard-root,
.assistant-message.MuiCard-root {
border: 1px solid #E0E0E0;
background-color: #FFFFFF;
@ -108,18 +124,30 @@ div {
font-size: 0.9rem;
}
.About.MuiCard-root {
display: flex;
flex-grow: 1;
width: 100%;
margin-left: 0;
margin-right: 0;
}
.About .MuiCardContent-root,
.assistant-message .MuiCardContent-root {
padding: 0 16px !important;
font-size: 0.9rem;
}
.About span,
.assistant-message span {
font-size: 0.9rem;
}
.user-message .MuiCardContent-root:last-child,
.assistant-message .MuiCardContent-root:last-child {
padding: 16px;
.assistant-message .MuiCardContent-root:last-child,
.About .MuiCardContent-root:last-child {
padding: 16px;
}
.users > div {
@ -137,6 +165,7 @@ div {
}
/* Reduce general whitespace in markdown content */
.About p.MuiTypography-root,
.assistant-message p.MuiTypography-root {
margin-top: 0.5rem;
margin-bottom: 0.5rem;
@ -149,7 +178,13 @@ div {
.assistant-message h3.MuiTypography-root,
.assistant-message h4.MuiTypography-root,
.assistant-message h5.MuiTypography-root,
.assistant-message h6.MuiTypography-root {
.assistant-message h6.MuiTypography-root,
.About h1.MuiTypography-root,
.About h2.MuiTypography-root,
.About h3.MuiTypography-root,
.About h4.MuiTypography-root,
.About h5.MuiTypography-root,
.About h6.MuiTypography-root {
margin-top: 1rem;
margin-bottom: 0.5rem;
font-size: 1rem;
@ -157,17 +192,21 @@ div {
/* Reduce space in lists */
.assistant-message ul.MuiTypography-root,
.assistant-message ol.MuiTypography-root {
margin-top: 0.5rem;
.assistant-message ol.MuiTypography-root,
.About ul.MuiTypography-root,
.About ol.MuiTypography-root {
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.9rem;
}
.About li.MuiTypography-root,
.assistant-message li.MuiTypography-root {
margin-bottom: 0.25rem;
font-size: 0.9rem;
}
.About .MuiTypography-root li,
.assistant-message .MuiTypography-root li {
margin-top: 0;
margin-bottom: 0;
@ -176,6 +215,7 @@ div {
}
/* Reduce space around code blocks */
.About .MuiTypography-root pre,
.assistant-message .MuiTypography-root pre {
border: 1px solid #F5F5F5;
border-radius: 0.5rem;

View File

@ -2,6 +2,9 @@ import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'r
import FormGroup from '@mui/material/FormGroup';
import FormControlLabel from '@mui/material/FormControlLabel';
import { styled } from '@mui/material/styles';
import Avatar from '@mui/material/Avatar';
import Tabs from '@mui/material/Tabs';
import Tab from '@mui/material/Tab';
import Switch from '@mui/material/Switch';
import Divider from '@mui/material/Divider';
import Tooltip from '@mui/material/Tooltip';
@ -19,7 +22,7 @@ import Drawer from '@mui/material/Drawer';
import Toolbar from '@mui/material/Toolbar';
import SettingsIcon from '@mui/icons-material/Settings';
import CloseIcon from '@mui/icons-material/Close';
import IconButton, { IconButtonProps } from '@mui/material/IconButton';
import IconButton from '@mui/material/IconButton';
import Box from '@mui/material/Box';
import CssBaseline from '@mui/material/CssBaseline';
import ResetIcon from '@mui/icons-material/History';
@ -27,22 +30,16 @@ import SendIcon from '@mui/icons-material/Send';
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
import Card from '@mui/material/Card';
import CardContent from '@mui/material/CardContent';
import CardActions from '@mui/material/CardActions';
import Collapse from '@mui/material/Collapse';
import Table from '@mui/material/Table';
import TableBody from '@mui/material/TableBody';
import TableCell from '@mui/material/TableCell';
import TableContainer from '@mui/material/TableContainer';
import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow';
import PropagateLoader from "react-spinners/PropagateLoader";
import { MuiMarkdown } from "mui-markdown";
import ReactMarkdown from 'react-markdown';
import rehypeKatex from 'rehype-katex'
import remarkMath from 'remark-math'
import 'katex/dist/katex.min.css' // `rehype-katex` does not import the CSS for you
import { ResumeBuilder } from './ResumeBuilder';
import { Message, MessageList } from './Message';
import { SeverityType } from './Snack';
import { ContextStatus } from './ContextStatus';
import './App.css';
@ -51,13 +48,10 @@ import '@fontsource/roboto/400.css';
import '@fontsource/roboto/500.css';
import '@fontsource/roboto/700.css';
//const use_mui_markdown = true
const use_mui_markdown = true
const welcomeMarkdown = `
# Welcome to AIRC
# Welcome to Backstory
This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
Backstory was written by James Ketrenos in order to provide answers to questions potential employers may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
You can ask things like:
* <ChatQuery text="What is James Ketrenos' work history?"/>
@ -68,7 +62,7 @@ You can ask things like:
Or click the text above to submit that query.
As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_airc@ketrenos.com**.
As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_backstory@ketrenos.com**.
`;
const welcomeMessage = {
@ -89,8 +83,6 @@ type Tool = {
enabled: boolean
};
type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined;
interface ControlsParams {
tools: Tool[],
rags: Tool[],
@ -115,33 +107,13 @@ type SystemInfo = {
"CPU": string
};
type MessageMetadata = {
rag: any,
tools: any[],
eval_count: number,
eval_duration: number,
prompt_eval_count: number,
prompt_eval_duration: number
};
type MessageData = {
role: string,
content: string,
user?: string,
type?: string,
id?: string,
isProcessing?: boolean,
metadata?: MessageMetadata
};
type MessageList = MessageData[];
const getConnectionBase = (loc: any): string => {
if (!loc.host.match(/.*battle-linux.*/)) {
return loc.protocol + "//" + loc.host;
} else {
return loc.protocol + "//battle-linux.ketrenos.com:5000";
return loc.protocol + "//battle-linux.ketrenos.com:8911";
}
}
@ -316,197 +288,30 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, messageHis
</div>);
}
interface ExpandMoreProps extends IconButtonProps {
expand: boolean;
interface TabPanelProps {
children?: React.ReactNode;
index: number;
tab: number;
}
const ExpandMore = styled((props: ExpandMoreProps) => {
const { expand, ...other } = props;
return <IconButton {...other} />;
})(({ theme }) => ({
marginLeft: 'auto',
transition: theme.transitions.create('transform', {
duration: theme.transitions.duration.shortest,
}),
variants: [
{
props: ({ expand }) => !expand,
style: {
transform: 'rotate(0deg)',
},
},
{
props: ({ expand }) => !!expand,
style: {
transform: 'rotate(180deg)',
},
},
],
}));
interface MessageInterface {
message: MessageData,
submitQuery: (text: string) => void
};
interface MessageMetaInterface {
metadata: MessageMetadata
}
const MessageMeta = ({ metadata }: MessageMetaInterface) => {
if (metadata === undefined) {
return <></>
}
return (<>
<Box sx={{ fontSize: "0.8rem", mb: 1 }}>
Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response.
</Box>
<TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
<Table aria-label="prompt stats" size="small">
<TableHead>
<TableRow>
<TableCell></TableCell>
<TableCell align="right" >Tokens</TableCell>
<TableCell align="right">Time (s)</TableCell>
<TableCell align="right">TPS</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow key="prompt" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Prompt</TableCell>
<TableCell align="right">{metadata.prompt_eval_count}</TableCell>
<TableCell align="right">{Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)}</TableCell>
</TableRow>
<TableRow key="response" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Response</TableCell>
<TableCell align="right">{metadata.eval_count}</TableCell>
<TableCell align="right">{Math.round(metadata.eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)}</TableCell>
</TableRow>
<TableRow key="total" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Total</TableCell>
<TableCell align="right">{metadata.prompt_eval_count + metadata.eval_count}</TableCell>
<TableCell align="right">{Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))}</TableCell>
</TableRow>
</TableBody>
</Table>
</TableContainer>
{
metadata.tools !== undefined && metadata.tools.length !== 0 &&
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}>
Tools queried
</Box>
</AccordionSummary>
<AccordionDetails>
{metadata.tools.map((tool: any, index: number) => <Box key={index}>
{index !== 0 && <Divider />}
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "column", mt: 0.5 }}>
<div style={{ display: "flex", paddingRight: "1rem", minWidth: "10rem", whiteSpace: "nowrap" }}>
{tool.tool}
</div>
<div style={{ display: "flex", padding: "3px", whiteSpace: "pre-wrap", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{JSON.stringify(tool.result, null, 2)}</div>
</Box>
</Box>)}
</AccordionDetails>
</Accordion>
}
{
metadata.rag.name !== undefined &&
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}>
Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions
</Box>
</AccordionSummary>
<AccordionDetails>
{metadata.rag.ids.map((id: number, index: number) => <Box key={index}>
{index !== 0 && <Divider />}
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }}>
<div style={{ display: "flex", flexDirection: "column", paddingRight: "1rem", minWidth: "10rem" }}>
<div style={{ whiteSpace: "nowrap" }}>Doc ID: {metadata.rag.ids[index]}</div>
<div style={{ whiteSpace: "nowrap" }}>Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}</div>
<div style={{ whiteSpace: "nowrap" }}>Type: {metadata.rag.metadatas[index].doc_type}</div>
<div style={{ whiteSpace: "nowrap" }}>Chunk Len: {metadata.rag.documents[index].length}</div>
</div>
<div style={{ display: "flex", padding: "3px", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{metadata.rag.documents[index]}</div>
</Box>
</Box>
)}
</AccordionDetails>
</Accordion>
}
</>
);
};
interface ChatQueryInterface {
text: string,
submitQuery: (text: string) => void
}
const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => {
return (<Button size="small" variant="outlined" sx={{ mb: 1 }} onClick={(e: any) => { console.log(text); submitQuery(text); }}>{text}</Button>);
}
const Message = ({ message, submitQuery }: MessageInterface) => {
const [expanded, setExpanded] = React.useState(false);
const handleExpandClick = () => {
setExpanded(!expanded);
};
const formattedContent = message.content.trim();
function CustomTabPanel(props: TabPanelProps) {
const { children, tab, index, ...other } = props;
return (
<Card sx={{ flexGrow: 1, pb: message.metadata ? 0 : "8px" }} className={(message.role === 'user' ? 'user-message' : 'assistant-message')}>
<CardContent>
{message.role === 'assistant' ?
use_mui_markdown ? <MuiMarkdown children={formattedContent} overrides={{
ChatQuery: {
component: ChatQuery,
props: {
submitQuery
}, // Optional: pass default props if needed
},
}} /> : <ReactMarkdown remarkPlugins={[remarkMath]}
rehypePlugins={[rehypeKatex]} children={formattedContent} />
:
<Typography variant="body2" sx={{ color: 'text.secondary' }}>
{message.content}
</Typography>
}
</CardContent>
{message.metadata && <>
<CardActions disableSpacing>
<Typography sx={{ color: "darkgrey", p: 1, textAlign: "end", flexGrow: 1 }}>LLM information for this query</Typography>
<ExpandMore
expand={expanded}
onClick={handleExpandClick}
aria-expanded={expanded}
aria-label="show more"
>
<ExpandMoreIcon />
</ExpandMore>
</CardActions>
<Collapse in={expanded} timeout="auto" unmountOnExit>
<CardContent>
<MessageMeta metadata={message.metadata} />
</CardContent>
</Collapse>
</>}
</Card>
<div
className="TabPanel"
role="tabpanel"
style={{ "display": tab === index ? "flex": "none" }}
id={`tabpanel-${index}`}
aria-labelledby={`tab-${index}`}
{...other}
>
{tab === index && children}
</div>
);
}
type ContextStatus = {
context_used: number,
max_context: number
};
const App = () => {
const [query, setQuery] = useState('');
const [conversation, setConversation] = useState<MessageList>([]);
@ -531,7 +336,8 @@ const App = () => {
const [lastPromptTPS, setLastPromptTPS] = useState<number>(430);
const [countdown, setCountdown] = useState<number>(0);
const [messageHistoryLength, setMessageHistoryLength] = useState<number>(5);
const [tab, setTab] = useState<number>(0);
const [about, setAbout] = useState<string>("");
const timerRef = useRef<any>(null);
const startCountdown = (seconds: number) => {
@ -611,6 +417,33 @@ const App = () => {
});
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
// Get the About markdown
useEffect(() => {
if (about !== "") {
return;
}
const fetchAbout = async () => {
try {
const response = await fetch("/docs/about.md", {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
});
if (!response.ok) {
throw Error("/docs/about.md not found");
}
const data = await response.text();
setAbout(data);
} catch (error: any) {
console.error('Error obtaining About content information:', error);
setAbout("No information provided.");
};
};
fetchAbout();
}, [about, setAbout])
// Update the context status
const updateContextStatus = useCallback(() => {
fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
@ -963,7 +796,7 @@ const App = () => {
case 'QueryInput':
sendQuery(query);
break;
}
}
}
};
@ -1148,6 +981,7 @@ const App = () => {
}
};
const handleSnackClose = (
event: React.SyntheticEvent | Event,
reason?: SnackbarCloseReason,
@ -1159,6 +993,10 @@ const App = () => {
setSnackOpen(false);
};
const handleTabChange = (event: React.SyntheticEvent, newValue: number) => {
setTab(newValue);
};
const Offset = styled('div')(({ theme }) => theme.mixins.toolbar);
return (
@ -1194,9 +1032,16 @@ const App = () => {
<ResetIcon />
</IconButton>
</Tooltip>
<Typography variant="h6" noWrap component="div">
ai.ketrenos.com
</Typography>
<Box sx={{ borderBottom: 1, borderColor: 'divider' }}>
<Tabs value={tab} indicatorColor="secondary"
textColor="inherit"
variant="fullWidth"
onChange={handleTabChange} aria-label="Backstory navigation">
<Tab label="Backstory" icon={<Avatar sx={{ width: 24, height: 24 }} variant="rounded" alt="Backstory logo" src="/logo192.png" />} iconPosition="start" />
<Tab label="Resume Builder"/>
<Tab label="About"/>
</Tabs>
</Box>
{
mobileOpen === true &&
@ -1244,62 +1089,79 @@ const App = () => {
{drawer}
</Drawer>
</Box>
<Box component="main" sx={{ flexGrow: 1, overflow: 'auto' }} className="ChatBox" ref={conversationRef}>
<Box className="Conversation" sx={{ flexGrow: 2, p: 1 }}>
{conversation.map((message, index) => <Message key={index} submitQuery={submitQuery} message={message} />)}
<Box sx={{
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
mb: 1
}}>
<PropagateLoader
size="10px"
loading={processing}
aria-label="Loading Spinner"
data-testid="loader"
<CustomTabPanel tab={tab} index={0}>
<Box component="main" sx={{ flexGrow: 1, overflow: 'auto' }} className="ChatBox" ref={conversationRef}>
<Box className="Conversation" sx={{ flexGrow: 2, p: 1 }}>
{conversation.map((message, index) => <Message key={index} submitQuery={submitQuery} message={message} />)}
<Box sx={{
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
mb: 1
}}>
<PropagateLoader
size="10px"
loading={processing}
aria-label="Loading Spinner"
data-testid="loader"
/>
{processing === true && countdown > 0 && (
<Box
sx={{
pt: 1,
fontSize: "0.7rem",
color: "darkgrey"
}}
>Estimated response time: {countdown}s</Box>
)}
</Box>
<Box sx={{ ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", display: "flex", flexDirection: "row", gap: 1, mt: "auto" }}>
Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context}
{
contextUsedPercentage >= 90 ? <Typography sx={{ fontSize: "0.6rem", color: "red" }}>WARNING: Context almost exhausted. You should start a new chat.</Typography>
: (contextUsedPercentage >= 50 ? <Typography sx={{ fontSize: "0.6rem", color: "orange" }}>NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls.</Typography>
: <></>)
}
</Box>
</Box>
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
<TextField
variant="outlined"
disabled={processing}
fullWidth
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Enter your question..."
id="QueryInput"
/>
{processing === true && countdown > 0 && (
<Box
sx={{
pt: 1,
fontSize: "0.7rem",
color: "darkgrey"
}}
>Estimated response time: {countdown}s</Box>
)}
</Box>
<Box sx={{ ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", display: "flex", flexDirection: "row", gap: 1, mt: "auto" }}>
Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context}
{
contextUsedPercentage >= 90 ? <Typography sx={{ fontSize: "0.6rem", color: "red" }}>WARNING: Context almost exhausted. You should start a new chat.</Typography>
: (contextUsedPercentage >= 50 ? <Typography sx={{ fontSize: "0.6rem", color: "orange" }}>NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls.</Typography>
: <></>)
}
</Box>
</Box>
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
<TextField
variant="outlined"
disabled={processing}
fullWidth
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Enter your question..."
id="QueryInput"
/>
<AccordionActions>
<Tooltip title="Send">
<Button sx={{ m: 0 }} variant="contained" onClick={() => { sendQuery(query); }}><SendIcon /></Button>
<Button sx={{ m: 1 }} variant="contained" onClick={() => { sendQuery(query); }}><SendIcon /></Button>
</Tooltip>
</AccordionActions>
</Box>
</Box>
</Box>
</Box>
</CustomTabPanel>
<CustomTabPanel tab={tab} index={1}>
<ResumeBuilder {...{isScrolledToBottom, scrollToBottom, processing, setProcessing, setSnack, connectionBase: getConnectionBase(loc), sessionId }}/>
</CustomTabPanel>
<CustomTabPanel tab={tab} index={2}>
<Box className="ChatBox">
<Box className="Conversation">
<Card sx={{ flexGrow: 1, }} className={'About ChatBox'}>
<CardContent>
<MuiMarkdown>{about}</MuiMarkdown>
</CardContent>
</Card>
</Box>
</Box>
</CustomTabPanel>
</Box>
<Snackbar open={snackOpen} autoHideDuration={(snackSeverity === "success" || snackSeverity === "info") ? 1500 : 6000} onClose={handleSnackClose}>
<Alert

View File

@ -0,0 +1,63 @@
import { createTheme } from '@mui/material/styles';
const backstoryTheme = createTheme({
palette: {
primary: {
main: '#1A2536', // Midnight Blue
contrastText: '#D3CDBF', // Warm Gray
},
secondary: {
main: '#4A7A7D', // Dusty Teal
contrastText: '#FFFFFF', // White
},
text: {
primary: '#2E2E2E', // Charcoal Black
secondary: '#1A2536',//D3CDBF', // Warm Gray
},
background: {
default: '#D3CDBF', // Warm Gray
paper: '#FFFFFF', // White
},
action: {
active: '#D4A017', // Golden Ochre
hover: 'rgba(212, 160, 23, 0.1)', // Golden Ochre with opacity
},
custom: {
highlight: '#D4A017', // Golden Ochre
contrast: '#2E2E2E', // Charcoal Black
},
},
typography: {
fontFamily: "'Roboto', sans-serif",
h1: {
fontSize: '2rem',
fontWeight: 500,
color: '#2E2E2E', // Charcoal Black
},
body1: {
fontSize: '1rem',
color: '#2E2E2E', // Charcoal Black
},
},
components: {
MuiButton: {
styleOverrides: {
root: {
textTransform: 'none',
'&:hover': {
backgroundColor: 'rgba(212, 160, 23, 0.2)', // Golden Ochre hover
},
},
},
},
MuiAppBar: {
styleOverrides: {
root: {
backgroundColor: '#1A2536', // Midnight Blue
},
},
},
},
});
export { backstoryTheme };

View File

@ -0,0 +1,58 @@
import { Box } from '@mui/material';
import { useTheme } from '@mui/material/styles';
import { SxProps, Theme } from '@mui/material';
import React from 'react';
interface ChatBubbleProps {
isUser: boolean;
isFullWidth?: boolean;
children: React.ReactNode;
sx?: SxProps<Theme>;
}
function ChatBubble({ isUser, isFullWidth, children, sx }: ChatBubbleProps) {
const theme = useTheme();
const userStyle = {
backgroundColor: theme.palette.background.default, // Warm Gray (#D3CDBF)
border: `1px solid ${theme.palette.custom.highlight}`, // Golden Ochre (#D4A017)
borderRadius: '16px 16px 0 16px', // Rounded, flat bottom-right for user
padding: theme.spacing(1, 2),
maxWidth: isFullWidth ? '100%' : '70%',
minWidth: '70%',
alignSelf: 'flex-end', // Right-aligned for user
color: theme.palette.primary.main, // Midnight Blue (#1A2536) for text
'& > *': {
color: 'inherit', // Children inherit Midnight Blue unless overridden
},
};
const assistantStyle = {
backgroundColor: theme.palette.primary.main, // Midnight Blue (#1A2536)
border: `1px solid ${theme.palette.secondary.main}`, // Dusty Teal (#4A7A7D)
borderRadius: '16px 16px 16px 0', // Rounded, flat bottom-left for assistant
padding: theme.spacing(1, 2),
maxWidth: isFullWidth ? '100%' : '70%',
minWidth: '70%',
alignSelf: 'flex-start', // Left-aligned for assistant
color: theme.palette.primary.contrastText, // Warm Gray (#D3CDBF) for text
'& > *': {
color: 'inherit', // Children inherit Warm Gray unless overridden
},
};
return (
<Box sx={{ ...(isUser ? userStyle : assistantStyle), ...sx }}>
{children}
</Box>
);
}
export type {
ChatBubbleProps
};
export {
ChatBubble
};

View File

@ -0,0 +1,8 @@
type ContextStatus = {
context_used: number,
max_context: number
};
export type {
ContextStatus
};

View File

@ -0,0 +1,201 @@
import React, { useState } from 'react';
import {
Typography,
Button,
Tabs,
Tab,
Paper,
IconButton,
Box,
useMediaQuery,
Divider,
Slider,
Stack,
TextField
} from '@mui/material';
import Tooltip from '@mui/material/Tooltip';
import { useTheme } from '@mui/material/styles';
import SendIcon from '@mui/icons-material/Send';
import {
ChevronLeft,
ChevronRight,
SwapHoriz,
} from '@mui/icons-material';
import { SxProps, Theme } from '@mui/material';
import { MuiMarkdown } from "mui-markdown";
import { MessageData } from './MessageMeta';
interface DocumentComponentProps {
title: string;
children?: React.ReactNode;
}
interface DocumentViewerProps {
generateResume: (jobDescription: string) => void,
resume: MessageData | undefined,
sx?: SxProps<Theme>,
};
const DocumentViewer: React.FC<DocumentViewerProps> = ({generateResume, resume, sx} : DocumentViewerProps) => {
const [jobDescription, setJobDescription] = useState<string>("");
const theme = useTheme();
const isMobile = useMediaQuery(theme.breakpoints.down('md'));
// State for controlling which document is active on mobile
const [activeDocMobile, setActiveDocMobile] = useState<number>(0);
// State for controlling split ratio on desktop
const [splitRatio, setSplitRatio] = useState<number>(50);
// Handle tab change for mobile
const handleTabChange = (_event: React.SyntheticEvent, newValue: number): void => {
setActiveDocMobile(newValue);
};
// Adjust split ratio
const handleSliderChange = (_event: Event, newValue: number | number[]): void => {
setSplitRatio(newValue as number);
};
// Reset split ratio
const resetSplit = (): void => {
setSplitRatio(50);
};
const handleKeyPress = (event: any) => {
if (event.key === 'Enter' && event.ctrlKey) {
generateResume(jobDescription);
}
};
// Document component
const Document: React.FC<DocumentComponentProps> = ({ title, children }) => (
<Box
sx={{
display: 'flex',
flexDirection: 'column',
flexGrow: 1,
overflow: 'hidden',
}}
>
{ title !== "" &&
<Box sx={{ display: 'flex', p: 2, bgcolor: 'primary.light', color: 'primary.contrastText' }}>
<Typography variant="h6">{title}</Typography>
</Box> }
<Box sx={{ display: 'flex', p: 2, flexGrow: 1, overflow: 'auto' }}>
{children}
</Box>
</Box>
);
// Mobile view
if (isMobile) {
return (
<Box sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, ...sx }}>
{/* Tabs */}
<Tabs
value={activeDocMobile}
onChange={handleTabChange}
variant="fullWidth"
sx={{ bgcolor: 'background.paper' }}
>
<Tab label="Job Description" />
<Tab label="Resume" />
</Tabs>
{/* Document display area */}
<Box sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, overflow: 'hidden', p: 2 }}>
{activeDocMobile === 0 ? (<>
<Document title="">
<TextField
variant="outlined"
fullWidth
multiline
type="text"
sx={{
flex: 1, // Makes the TextField fill the parent height
flexGrow: 1,
maxHeight: '100%', // Prevents it from growing larger than the parent height
overflow: 'auto', // Enables scrollbars if the content overflows
}}
value={jobDescription}
onChange={(e) => setJobDescription(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Enter job description..."
/>
</Document>
<Button onClick={(e: any) => { generateResume(jobDescription); } }>Generate</Button>
</>) : (
<Document title="">{ resume !== undefined && <MuiMarkdown children={resume.content.trim()}/> }</Document>
)}
</Box>
</Box>
);
}
// Desktop view
return (
<Box sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, ...sx }}>
{/* Split document view */}
<Box sx={{ display: 'flex', flexGrow: 1, overflow: 'hidden', p: 2 }}>
<Box sx={{ display: 'flex', flexDirection: 'column', width: `${splitRatio}%`, pr: 1, flexGrow: 1, overflow: 'hidden' }}>
<Document title="Job Description">
<TextField
variant="outlined"
fullWidth
type="text"
multiline
sx={{
flex: 1, // Makes the TextField fill the parent height
flexGrow: 1,
maxHeight: '100%', // Prevents it from growing larger than the parent height
overflow: 'auto', // Enables scrollbars if the content overflows
}}
value={jobDescription}
onChange={(e) => setJobDescription(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Enter job description..."
/>
</Document>
<Tooltip title="Generate">
<Button sx={{ m: 1, gap: 1 }} variant="contained" onClick={() => { generateResume(jobDescription); }}>Generate<SendIcon /></Button>
</Tooltip>
</Box>
<Divider orientation="vertical" flexItem />
<Box sx={{ display: 'flex', width: `${100 - splitRatio}%`, pl: 1, flexGrow: 1 }}>
<Document title="Resume">{ resume !== undefined && <MuiMarkdown children={resume.content.trim()}/> }</Document>
</Box>
</Box>
{/* Split control panel */}
<Paper sx={{ p: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
<Stack direction="row" spacing={2} alignItems="center" sx={{ width: '60%' }}>
<IconButton onClick={() => setSplitRatio(Math.max(20, splitRatio - 10))}>
<ChevronLeft />
</IconButton>
<Slider
value={splitRatio}
onChange={handleSliderChange}
aria-label="Split ratio"
min={20}
max={80}
/>
<IconButton onClick={() => setSplitRatio(Math.min(80, splitRatio + 10))}>
<ChevronRight />
</IconButton>
<IconButton onClick={resetSplit}>
<SwapHoriz />
</IconButton>
</Stack>
</Paper>
</Box>
);
};
export type {
DocumentViewerProps
};
export { DocumentViewer };

View File

@ -0,0 +1,34 @@
import { styled } from '@mui/material/styles';
import IconButton, { IconButtonProps } from '@mui/material/IconButton';
interface ExpandMoreProps extends IconButtonProps {
expand: boolean;
}
const ExpandMore = styled((props: ExpandMoreProps) => {
const { expand, ...other } = props;
return <IconButton {...other} />;
})(({ theme }) => ({
marginLeft: 'auto',
transition: theme.transitions.create('transform', {
duration: theme.transitions.duration.shortest,
}),
variants: [
{
props: ({ expand }) => !expand,
style: {
transform: 'rotate(0deg)',
},
},
{
props: ({ expand }) => !!expand,
style: {
transform: 'rotate(180deg)',
},
},
],
}));
export {
ExpandMore
};

100
frontend/src/Message.tsx Normal file
View File

@ -0,0 +1,100 @@
import { useState } from 'react';
import Box from '@mui/material/Box';
import Button from '@mui/material/Button';
import CardContent from '@mui/material/CardContent';
import CardActions from '@mui/material/CardActions';
import Collapse from '@mui/material/Collapse';
import { MuiMarkdown } from "mui-markdown";
import Typography from '@mui/material/Typography';
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
import { ExpandMore } from './ExpandMore';
import { MessageData, MessageMeta } from './MessageMeta';
import { ChatBubble } from './ChatBubble';
type MessageList = MessageData[];
interface MessageInterface {
message?: MessageData,
isFullWidth?: boolean,
submitQuery?: (text: string) => void
};
interface ChatQueryInterface {
text: string,
submitQuery?: (text: string) => void
}
const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => {
return (submitQuery
? <Button variant="outlined" sx={{
color: theme => theme.palette.custom.highlight, // Golden Ochre (#D4A017)
borderColor: theme => theme.palette.custom.highlight,
mt: 1,
mb: 1
}}
size="small" onClick={(e: any) => { console.log(text); submitQuery(text); }}>{text}</Button>
: <Box>{text}</Box>);
}
const Message = ({ message, submitQuery, isFullWidth }: MessageInterface) => {
const [expanded, setExpanded] = useState<boolean>(false);
const handleExpandClick = () => {
setExpanded(!expanded);
};
if (message === undefined) {
return (<></>);
}
const formattedContent = message.content.trim();
return (
<ChatBubble isFullWidth={isFullWidth} isUser={message.role === 'user'} sx={{ flexGrow: 1, pb: message.metadata ? 0 : "8px", mb: 1, mt: 1 }}>
<CardContent>
{message.role === 'assistant' ?
<MuiMarkdown children={formattedContent} overrides={{
ChatQuery: {
component: ChatQuery,
props: {
submitQuery
}, // Optional: pass default props if needed
},
}} />
:
<Typography variant="body2" sx={{ color: 'text.secondary' }}>
{message.content}
</Typography>
}
</CardContent>
{message.metadata && <>
<CardActions disableSpacing>
<Typography sx={{ color: "darkgrey", p: 1, textAlign: "end", flexGrow: 1 }}>LLM information for this query</Typography>
<ExpandMore
expand={expanded}
onClick={handleExpandClick}
aria-expanded={expanded}
aria-label="show more"
>
<ExpandMoreIcon />
</ExpandMore>
</CardActions>
<Collapse in={expanded} timeout="auto" unmountOnExit>
<CardContent>
<MessageMeta metadata={message.metadata} />
</CardContent>
</Collapse>
</>}
</ChatBubble>
);
};
export type {
MessageInterface,
MessageList
};
export {
Message
};

View File

@ -0,0 +1,135 @@
//import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
import Divider from '@mui/material/Divider';
import Accordion from '@mui/material/Accordion';
import AccordionSummary from '@mui/material/AccordionSummary';
import AccordionDetails from '@mui/material/AccordionDetails';
import Box from '@mui/material/Box';
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
import Card from '@mui/material/Card';
import Table from '@mui/material/Table';
import TableBody from '@mui/material/TableBody';
import TableCell from '@mui/material/TableCell';
import TableContainer from '@mui/material/TableContainer';
import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow';
type MessageMetadata = {
rag: any,
tools: any[],
eval_count: number,
eval_duration: number,
prompt_eval_count: number,
prompt_eval_duration: number
};
type MessageData = {
role: string,
content: string,
user?: string,
type?: string,
id?: string,
isProcessing?: boolean,
metadata?: MessageMetadata
};
interface MessageMetaInterface {
metadata: MessageMetadata
}
const MessageMeta = ({ metadata }: MessageMetaInterface) => {
if (metadata === undefined) {
return <></>
}
return (<>
<Box sx={{ fontSize: "0.8rem", mb: 1 }}>
Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response.
</Box>
<TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
<Table aria-label="prompt stats" size="small">
<TableHead>
<TableRow>
<TableCell></TableCell>
<TableCell align="right" >Tokens</TableCell>
<TableCell align="right">Time (s)</TableCell>
<TableCell align="right">TPS</TableCell>
</TableRow>
</TableHead>
<TableBody>
<TableRow key="prompt" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Prompt</TableCell>
<TableCell align="right">{metadata.prompt_eval_count}</TableCell>
<TableCell align="right">{Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)}</TableCell>
</TableRow>
<TableRow key="response" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Response</TableCell>
<TableCell align="right">{metadata.eval_count}</TableCell>
<TableCell align="right">{Math.round(metadata.eval_duration / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)}</TableCell>
</TableRow>
<TableRow key="total" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
<TableCell component="th" scope="row">Total</TableCell>
<TableCell align="right">{metadata.prompt_eval_count + metadata.eval_count}</TableCell>
<TableCell align="right">{Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100}</TableCell>
<TableCell align="right">{Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))}</TableCell>
</TableRow>
</TableBody>
</Table>
</TableContainer>
{
metadata.tools !== undefined && metadata.tools.length !== 0 &&
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}>
Tools queried
</Box>
</AccordionSummary>
<AccordionDetails>
{metadata.tools.map((tool: any, index: number) => <Box key={index}>
{index !== 0 && <Divider />}
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "column", mt: 0.5 }}>
<div style={{ display: "flex", paddingRight: "1rem", minWidth: "10rem", whiteSpace: "nowrap" }}>
{tool.tool}
</div>
<div style={{ display: "flex", padding: "3px", whiteSpace: "pre-wrap", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{JSON.stringify(tool.result, null, 2)}</div>
</Box>
</Box>)}
</AccordionDetails>
</Accordion>
}
{
metadata.rag.name !== undefined &&
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Box sx={{ fontSize: "0.8rem" }}>
Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions
</Box>
</AccordionSummary>
<AccordionDetails>
{metadata.rag.ids.map((id: number, index: number) => <Box key={index}>
{index !== 0 && <Divider />}
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }}>
<div style={{ display: "flex", flexDirection: "column", paddingRight: "1rem", minWidth: "10rem" }}>
<div style={{ whiteSpace: "nowrap" }}>Doc ID: {metadata.rag.ids[index]}</div>
<div style={{ whiteSpace: "nowrap" }}>Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}</div>
<div style={{ whiteSpace: "nowrap" }}>Type: {metadata.rag.metadatas[index].doc_type}</div>
<div style={{ whiteSpace: "nowrap" }}>Chunk Len: {metadata.rag.documents[index].length}</div>
</div>
<div style={{ display: "flex", padding: "3px", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{metadata.rag.documents[index]}</div>
</Box>
</Box>
)}
</AccordionDetails>
</Accordion>
}
</>
);
};
export type {
MessageMetadata,
MessageMetaInterface,
MessageData
};
export { MessageMeta };

View File

@ -0,0 +1,310 @@
import { useState, useCallback, useRef } from 'react';
import Box from '@mui/material/Box';
import TextField from '@mui/material/TextField';
import PropagateLoader from "react-spinners/PropagateLoader";
import Tooltip from '@mui/material/Tooltip';
import Button from '@mui/material/Button';
import SendIcon from '@mui/icons-material/Send';
import { Message } from './Message';
import { SeverityType } from './Snack';
import { ContextStatus } from './ContextStatus';
import { MessageData } from './MessageMeta';
import { DocumentViewer } from './DocumentViewer';
interface ResumeBuilderProps {
scrollToBottom: () => void,
isScrolledToBottom: () => boolean,
setProcessing: (processing: boolean) => void,
processing: boolean,
connectionBase: string,
sessionId: string | undefined,
setSnack: (message: string, severity?: SeverityType) => void,
};
const ResumeBuilder = ({scrollToBottom, isScrolledToBottom, setProcessing, processing, connectionBase, sessionId, setSnack} : ResumeBuilderProps) => {
const [jobDescription, setJobDescription] = useState<string>("");
const [generateStatus, setGenerateStatus] = useState<MessageData | undefined>(undefined);
const [lastEvalTPS, setLastEvalTPS] = useState<number>(35);
const [lastPromptTPS, setLastPromptTPS] = useState<number>(430);
const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
const [countdown, setCountdown] = useState<number>(0);
const [resume, setResume] = useState<MessageData | undefined>(undefined);
const timerRef = useRef<any>(null);
const updateContextStatus = useCallback(() => {
fetch(connectionBase + `/api/context-status/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
})
.then(response => response.json())
.then(data => {
setContextStatus(data);
})
.catch(error => {
console.error('Error getting context status:', error);
setSnack("Unable to obtain context status.", "error");
});
}, [setContextStatus, connectionBase, setSnack, sessionId]);
const startCountdown = (seconds: number) => {
if (timerRef.current) clearInterval(timerRef.current);
setCountdown(seconds);
timerRef.current = setInterval(() => {
setCountdown((prev) => {
if (prev <= 1) {
clearInterval(timerRef.current);
timerRef.current = null;
if (isScrolledToBottom()) {
setTimeout(() => {
scrollToBottom();
}, 50)
}
return 0;
}
return prev - 1;
});
}, 1000);
};
const stopCountdown = () => {
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
setCountdown(0);
}
};
if (sessionId === undefined) {
return (<></>);
}
const handleKeyPress = (event: any) => {
if (event.key === 'Enter' && !event.ctrlKey) {
generateResume(jobDescription);
}
};
const generateResume = async (jobDescription: string) => {
if (!jobDescription.trim()) return;
// setResume(undefined);
let scrolledToBottom;
scrollToBottom();
try {
scrolledToBottom = isScrolledToBottom();
setProcessing(true);
// Add initial processing message
setGenerateStatus({ role: 'assistant', content: 'Processing request...' });
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
// Make the fetch request with proper headers
const response = await fetch(connectionBase + `/api/generate-resume/${sessionId}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
},
body: JSON.stringify({ content: jobDescription.trim() }),
});
// We'll guess that the response will be around 500 tokens...
const token_guess = 500;
const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS);
scrolledToBottom = isScrolledToBottom();
setSnack(`Job description sent. Response estimated in ${estimate}s.`, "info");
startCountdown(Math.round(estimate));
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
if (!response.ok) {
throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
}
if (!response.body) {
throw new Error('Response body is null');
}
// Set up stream processing with explicit chunking
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
const chunk = decoder.decode(value, { stream: true });
// Process each complete line immediately
buffer += chunk;
let lines = buffer.split('\n');
buffer = lines.pop() || ''; // Keep incomplete line in buffer
for (const line of lines) {
if (!line.trim()) continue;
try {
const update = JSON.parse(line);
// Force an immediate state update based on the message type
if (update.status === 'processing') {
scrolledToBottom = isScrolledToBottom();
// Update processing message with immediate re-render
setGenerateStatus({ role: 'info', content: update.message });
console.log(update.num_ctx);
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
// Add a small delay to ensure React has time to update the UI
await new Promise(resolve => setTimeout(resolve, 0));
} else if (update.status === 'done') {
// Replace processing message with final result
scrolledToBottom = isScrolledToBottom();
setGenerateStatus(undefined);
setResume(update.message);
const metadata = update.message.metadata;
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
setLastEvalTPS(evalTPS ? evalTPS : 35);
setLastPromptTPS(promptTPS ? promptTPS : 35);
updateContextStatus();
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
} else if (update.status === 'error') {
// Show error
scrolledToBottom = isScrolledToBottom();
setGenerateStatus({role: 'error', content: update.message });
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
}
} catch (e) {
setSnack("Error generating resume", "error")
console.error('Error parsing JSON:', e, line);
}
}
}
// Process any remaining buffer content
if (buffer.trim()) {
try {
const update = JSON.parse(buffer);
if (update.status === 'done') {
scrolledToBottom = isScrolledToBottom();
setGenerateStatus(undefined);
setResume(update.message);
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 500);
}
}
} catch (e) {
setSnack("Error processing job description", "error")
}
}
scrolledToBottom = isScrolledToBottom();
stopCountdown();
setProcessing(false);
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
} catch (error) {
console.error('Fetch error:', error);
setSnack("Unable to process job description", "error");
scrolledToBottom = isScrolledToBottom();
setGenerateStatus({ role: 'error', content: `Error: ${error}` });
setProcessing(false);
stopCountdown();
if (scrolledToBottom) {
setTimeout(() => { scrollToBottom() }, 50);
}
}
};
return (
<Box className="ChatBox">
<Box className="Conversation">
<DocumentViewer sx={{
display: "flex",
flexGrow: 1,
overflowY: "auto",
flexDirection: "column",
height: "calc(0vh - 0px)", // Hack to make the height work
}} {...{ generateResume, resume }} />
</Box>
</Box>
);
return (<Box className="ChatBox">
<Box className="Conversation">
<TextField
variant="outlined"
autoFocus
fullWidth
multiline
rows="10"
type="text"
value={jobDescription}
onChange={(e) => setJobDescription(e.target.value)}
onKeyDown={handleKeyPress}
placeholder="Enter the job description.."
id="JobDescriptionInput"
/>
<Tooltip title="Generate">
<Button sx={{ m: 1, gap: 1 }} variant="contained" onClick={() => { generateResume(jobDescription); }}>Generate<SendIcon /></Button>
</Tooltip>
<Box sx={{
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
mb: 1
}}>
<PropagateLoader
size="10px"
loading={processing}
aria-label="Loading Spinner"
data-testid="loader"
/>
{processing === true && countdown > 0 && (
<Box
sx={{
pt: 1,
fontSize: "0.7rem",
color: "darkgrey"
}}
>Estimated response time: {countdown}s</Box>
)}
{generateStatus && <Message isFullWidth={true} message={generateStatus} />}
{/* {resume && <Message isFullWidth={true} message={resume} />} */}
</Box>
</Box>
</Box>);
}
export type {
ResumeBuilderProps
};
export {
ResumeBuilder
};

5
frontend/src/Snack.tsx Normal file
View File

@ -0,0 +1,5 @@
type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined;
export type {
SeverityType
};

View File

@ -1,5 +1,7 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import { ThemeProvider } from '@mui/material/styles';
import { backstoryTheme } from './BackstoryTheme'; // Adjust path as needed
import './index.css';
import App from './App';
import reportWebVitals from './reportWebVitals';
@ -9,7 +11,9 @@ const root = ReactDOM.createRoot(
);
root.render(
<React.StrictMode>
<App />
<ThemeProvider theme={backstoryTheme}>
<App />
</ThemeProvider>
</React.StrictMode>
);

View File

Before

Width:  |  Height:  |  Size: 2.6 KiB

After

Width:  |  Height:  |  Size: 2.6 KiB

16
frontend/src/types/theme.d.ts vendored Normal file
View File

@ -0,0 +1,16 @@
import { Palette, PaletteOptions } from '@mui/material/styles';
declare module '@mui/material/styles' {
interface Palette {
custom: {
highlight: string;
contrast: string;
};
}
interface PaletteOptions {
custom?: {
highlight: string;
contrast: string;
};
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

View File

@ -1,133 +0,0 @@
# Ketr Chat
This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history.
In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
## Parts of Ketr Chat
* Backend Server
Provides a custom REST API to support the capabilities exposed from the web UI.
* Pytorch used for LLM communication and inference
* ChromaDB as a vector store for embedding similarities
* FastAPI for the http REST API endpoints
* Serves the static site for production deployment
* Performs all communication with the LLM (currently via ollama.cpp, however I may be switching it back to Hugging Face transformers.)
* Implements the tool subsystem for tool callbacks from the LLM
* Manages a chromadb vector store, including the chunking and embedding of the documents used to provide RAG content related to my career.
* Manages all context sessions
* Currently using qwen2.5:7b, however I frequently switch between different models (llama3.2, deepseek-r1:7b, and mistral:7b.) I've generally had the best results from qwen2.5. DeepSeek-R1 was very cool; the thinking phase was informative for developing system prompts, however the integration with ollama does not support tool calls. That is one reason I'm looking to switch back to Hugging Face transformers.
* Languages: Python, bash
* Web Frontend
Provides a responsive UI for interacting with the system
* Written using React and Mui.
* Exposes enough information to know what the LLM is doing on the backend
* Enables adjusting various parameters, including enabling/disabling tools and the RAG, system prompt, etc.
* Configured to be able to run in development and production. In development mode, the Server does not serve the Web Frontend and only acts as a REST API endpoint.
* Languages: JSX, JavaScript, TypeScript, bash
* Ollama container
If you don't already have ollama installed and running, the container provided in this project is built using the Intel pre-built Ollama package.
* Jupyter notebook
To facilitate rapid development and prototyping, a Jupyter notebook is provided which runs on the same Python package set as the main server container.
# Installation
This project uses docker containers to build. As this was originally written to work on an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10)..
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
## Want to run under WSL2? No can do...
https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
The A- and B-series discrete GPUs do not support SR-IOV, required for the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
## Building
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
```bash
git clone https://github.com/jketreno/ketr-chat
cd ketr-chat
docker compose build
```
## Running
In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
Edit .env to add the following:
```.env
HF_ACCESS_TOKEN=<access token from huggingface>
```
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
### Ketr Chat
To launch the ketr-chat shell interactively, with the pytorch 2.6 environment loaded, use the default entrypoint to launch a shell:
```bash
docker compose run --rm ketr-chat shell
```
Once in the shell, you can then launch the server.py:
```bash
docker compose run --rm ketr-chat shell
python src/server.py
```
If you launch the server without any parameters, it will run the backend server, which will host the static web frontend built during the `docker compose build`.
That is the behavior if you up the container:
```bash
docker compose up -d
```
### Jupyter
```bash
docker compose up jupyter -d
```
The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
### Monitoring
You can run `ze-monitor` within the launched containers to monitor GPU usage.
```bash
containers=($(docker ps --filter "ancestor=ketr-chat" --format "{{.ID}}"))
if [[ ${#containers[*]} -eq 0 ]]; then
echo "Running ketr-chat container not found."
else
for container in ${containers[@]}; do
echo "Container ${container} devices:"
docker exec -it ${container} ze-monitor
done
fi
```
If an ketr-chat container is running, you should see something like:
```
Container 5317c503e771 devices:
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
```
You can then launch ze-monitor in that container specifying the device you wish to monitor:
```
containers=($(docker ps --filter "ancestor=ketr-chat" --format "{{.ID}}"))
docker exec -it ${containers[0]} ze-monitor --device 2
```

View File

@ -1,279 +0,0 @@
# ze-monitor
A small utility to monitor Level Zero devices via
[Level Zero Sysman](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/sysman/PROG.html#sysman-programming-guide)
from the command line, similar to 'top'.
# Installation
Requires Ubuntu Oracular 24.10.
## Easiest
### Install prerequisites
This will add the [Intel Graphics Preview PPA](https://github.com/canonical/intel-graphics-preview) and install the required dependencies:
```bash
sudo apt-get install -y \
software-properties-common \
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& sudo apt-get update \
&& sudo apt-get install -y \
libze1 libze-intel-gpu1 libncurses6
```
### Install ze-monitor from .deb package
This will download the ze-monitor GitHub, install it, and add the current
user to the 'ze-monitor' group to allow running the utility:
```bash
version=0.3.0-1
wget https://github.com/jketreno/ze-monitor/releases/download/v${version}/ze-monitor-${version}_amd64.deb
sudo dpkg -i ze-monitor-${version}_amd64.deb
sudo usermod -a -G ze-monitor $(whoami)
newgrp ze-monitor
```
Congratulations! You can run ze-monitor:
```bash
ze-monitor
```
You should see something like:
```bash
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
```
To monitor a device:
```bash
ze-monitor --device 2
```
Check the docs (`man ze-monitor`) for additional details on running the ze-monitor utility.
## Slightly more involved
This project uses docker containers to build. As this was originally written to monitor an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10). It will monitor any Level Zero device, even those using the i915 driver.
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
```
git clone https://github.com/jketreno/ze-monitor.git
cd ze-monitor
docker compose build
sudo apt install libze1 libncurses6
version=$(cat src/version.txt)
docker compose run --remove-orphans --rm \
ze-monitor \
cp /opt/ze-monitor-static/build/ze-monitor-${version}_amd64.deb \
/opt/ze-monitor/build
sudo dpkg -i build/ze-monitor-${version}_amd64.deb
```
# Security
In order for ze-monitor to read the performance metric units (PMU) in the Linux kernel, it needs elevated permissions. The easiest way is to install the .deb package and add the user to the ze-monitor group. Or, run under sudo (eg., `sudo ze-monitor ...`.)
The specific capabilities required to monitor the GPU are documented in [Perf Security](https://www.kernel.org/doc/html/v5.1/admin-guide/perf-security.html) and [man capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). These include:
| Capability | Reason |
|:--------------------|:-----------------------------------------------------|
| CAP_DAC_READ_SEARCH | Bypass all filesystem read access checks |
| CAP_PERFMON | Access to perf_events (vs. overloaded CAP_SYS_ADMIN) |
| CAP_SYS_PTRACE | PTRACE_MODE_READ_REALCREDS ptrace access mode check |
To configure ze-monitor to run with those privileges, you can use `setcap` to set the correct capabilities on ze-monitor. You can further secure your system by creating a user group specifically for running the utility and restrict running of that command to users in that group. That is what the .deb package does.
If you install the .deb package from a [Release](https://github.com/jketreno/ze-monitor/releases) or by building it, that package will set the appropriate permissions for ze-monitor on installation and set it executable only to those in the 'ze-monitor' group.
## Anyone can run ze-monitor
If you build from source and want to set the capabilities:
```bash
sudo setcap "cap_perfmon,cap_dac_read_search,cap_sys_ptrace=ep" build/ze-monitor
getcap build/ze-monitor
```
Any user can then run `build/ze-monitor` and monitor the GPU.
# Build outside container
## Prerequisites
If you would like to build outside of docker, you need the following packages installed:
```
sudo apt-get install -y \
build-essential \
libfmt-dev \
libncurses-dev
```
In addition, you need the Intel drivers installed, which are available from the `kobuk-team/intel-graphics` PPA:
```
sudo apt-get install -y \
software-properties-common \
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& sudo apt-get update \
&& sudo apt-get install -y \
libze-intel-gpu1 \
libze1 \
libze-dev
```
## Building
```
cd build
cmake ..
make
```
## Running
```
build/ze-monitor
```
## Build and install .deb
In order to build the .deb package, you need the following packages installed:
```bash
sudo apt-get install -y \
debhelper \
devscripts \
rpm \
rpm2cpio
```
You can then build the .deb:
```bash
if [ -d build ]; then
cd build
fi
version=$(cat ../src/version.txt)
cpack
sudo dpkg -i build/packages/ze-monitor_${version}_amd64.deb
```
You can then run ze-monitor from your path:
```bash
ze-monitor
```
# Developing
To run the built binary without building a full .deb package, you can build and run on the host by compiling in the container:
```
docker compose run --rm ze-monitor build.sh
build/ze-monitor
```
The build.sh script will build the binary in /opt/ze-monitor/build, which is volume mounted to the host's build directory.
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
# Running
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
If running within a docker container, the container environment does not have access to the host's `/proc/fd`, which is necessary to obtain information about the processes outside the current container which are using the GPU. As such, only processes running within that container running ze-monitor will be listed as using the GPU.
## List available devices
```
ze-monitor
```
Example output:
```bash
$ ze-monitor
Device 1: 8086:E20B (Intel(R) Graphics [0xe20b])
Device 2: 8086:A780 (Intel(R) UHD Graphics 770)
```
## Show details for a given device
```
sudo ze-monitor --info --device ( PCIID | # | BDF | UUID | /dev/dri/render*)
```
Example output:
```bash
$ sudo ze-monitor --device 2 --info
Device: 8086:A780 (Intel(R) UHD Graphics 770)
UUID: 868080A7-0400-0000-0002-000000000000
BDF: 0000:0000:0002:0000
PCI ID: 8086:A780
Subdevices: 0
Serial Number: unknown
Board Number: unknown
Brand Name: unknown
Model Name: Intel(R) UHD Graphics 770
Vendor Name: Intel(R) Corporation
Driver Version: 0CB7EFCAD5695B7EC5C8CE6
Type: GPU
Is integrated with host: Yes
Is a sub-device: No
Supports error correcting memory: No
Supports on-demand page-faulting: No
Engines: 7
Engine 1: ZES_ENGINE_GROUP_RENDER_SINGLE
Engine 2: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
Engine 3: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
Engine 4: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
Engine 5: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
Engine 6: ZES_ENGINE_GROUP_COPY_SINGLE
Engine 7: ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE
Temperature Sensors: 0
```
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
## Monitor a given device
```
sudo ze-monitor --device ( PCIID | # | BDF | UUID | /dev/dri/render* ) \
--interval ms
```
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
Output:
```bash
$ sudo ze-monitor --device 2 --interval 500
Device: 8086:E20B (Intel(R) Graphics [0xe20b])
Total Memory: 12809404416
Free memory: [# 55% ############################ ]
Power usage: 165.0W
------------------------------------------------------------------------------------------
PID COMMAND-LINE
USED MEMORY SHARED MEMORY ENGINE FLAGS
------------------------------------------------------------------------------------------
1 /sbin/init splash
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
1606 /usr/lib/systemd/systemd-logind
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
5164 /usr/bin/gnome-shell
MEM: 530513920 SHR: 503316480 FLAGS: RENDER COMPUTE
5237 /usr/bin/Xwayland :1024 -rootless -nores...isplayfd 6 -initfd 7 -byteswappedclients
MEM: 0 SHR: 0 FLAGS:
40480 python chat.py
MEM: 5544226816 SHR: 0 FLAGS: DMA COMPUTE
```
If you pass `--one-shot`, statistics will be gathered, displayed, and then ze-monitor will exit.

View File

@ -1,195 +0,0 @@
div {
box-sizing: border-box
}
.SystemInfo {
display: flex;
flex-direction: column;
gap: 5px;
padding: 5px;
flex-grow: 1;
}
.SystemInfoItem {
display: flex; /* Grid for individual items */
flex-direction: row;
flex-grow: 1;
}
.SystemInfoItem > div:first-child {
display: flex;
justify-self: end; /* Align the first column content to the right */
width: 10rem;
}
.SystemInfoItem > div:last-child {
display: flex;
flex-grow: 1;
justify-self: end; /* Align the first column content to the right */
}
.ChatBox {
display: flex;
flex-direction: column;
flex-grow: 1;
max-width: 800px;
margin: 0 auto;
}
.Controls {
display: flex;
background-color: #F5F5F5;
border: 1px solid #E0E0E0;
overflow-y: auto;
padding: 10px;
flex-direction: column;
margin-left: 10px;
box-sizing: border-box;
overflow-x: visible;
min-width: 10rem;
width: 100%;
flex-grow: 1;
}
@media (min-width: 768px) {
.Controls {
width: 600px; /* or whatever you prefer for a desktop */
max-width: 80vw; /* Optional: Prevent it from taking up too much space */
}
}
.Conversation {
display: flex;
background-color: #F5F5F5;
border: 1px solid #E0E0E0;
flex-grow: 1;
overflow-y: auto;
padding: 10px;
flex-direction: column;
height: 100%;
}
.user-message.MuiCard-root {
background-color: #DCF8C6;
border: 1px solid #B2E0A7;
color: #333333;
margin-bottom: 0.75rem;
margin-left: 1rem;
border-radius: 0.25rem;
min-width: 80%;
max-width: 80%;
justify-self: right;
display: flex;
white-space: pre-wrap;
overflow-wrap: break-word;
word-break: break-word;
flex-direction: column;
align-items: self-end;
align-self: end;
flex-grow: 0;
}
.assistant-message.MuiCard-root {
border: 1px solid #E0E0E0;
background-color: #FFFFFF;
color: #333333;
margin-bottom: 0.75rem;
margin-right: 1rem;
min-width: 70%;
border-radius: 0.25rem;
justify-self: left;
display: flex;
white-space: pre-wrap;
overflow-wrap: break-word;
word-break: break-word;
flex-direction: column;
flex-grow: 0;
padding: 16px 0;
font-size: 0.9rem;
}
.assistant-message .MuiCardContent-root {
padding: 0 16px !important;
font-size: 0.9rem;
}
.assistant-message span {
font-size: 0.9rem;
}
.user-message .MuiCardContent-root:last-child,
.assistant-message .MuiCardContent-root:last-child {
padding: 16px;
}
.users > div {
padding: 0.25rem;
}
.user-active {
font-weight: bold;
}
.metadata {
border: 1px solid #E0E0E0;
font-size: 0.75rem;
padding: 0.125rem;
}
/* Reduce general whitespace in markdown content */
.assistant-message p.MuiTypography-root {
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.9rem;
}
/* Reduce space between headings and content */
.assistant-message h1.MuiTypography-root,
.assistant-message h2.MuiTypography-root,
.assistant-message h3.MuiTypography-root,
.assistant-message h4.MuiTypography-root,
.assistant-message h5.MuiTypography-root,
.assistant-message h6.MuiTypography-root {
margin-top: 1rem;
margin-bottom: 0.5rem;
font-size: 1rem;
}
/* Reduce space in lists */
.assistant-message ul.MuiTypography-root,
.assistant-message ol.MuiTypography-root {
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.9rem;
}
.assistant-message li.MuiTypography-root {
margin-bottom: 0.25rem;
font-size: 0.9rem;
}
.assistant-message .MuiTypography-root li {
margin-top: 0;
margin-bottom: 0;
padding: 0;
font-size: 0.9rem;
}
/* Reduce space around code blocks */
.assistant-message .MuiTypography-root pre {
border: 1px solid #F5F5F5;
border-radius: 0.5rem;
padding: 0.5rem 0.75rem;
margin-top: 0;
margin-bottom: 0;
font-size: 0.9rem;
}
.PromptStats .MuiTableCell-root {
font-size: 0.8rem;
}
#SystemPromptInput {
font-size: 0.9rem;
line-height: 1.25rem;
}

File diff suppressed because it is too large Load Diff

View File

@ -6,14 +6,8 @@ import asyncio
import json
import logging
import os
import queue
import re
import time
from datetime import datetime
import textwrap
import threading
import uuid
import random
import subprocess
import re
import math
@ -26,29 +20,15 @@ def try_import(module_name, pip_name=None):
print(f" pip install {pip_name or module_name}")
# Third-party modules with import checks
try_import('gradio')
try_import('ollama')
try_import('pytz')
try_import('requests')
try_import('yfinance', 'yfinance')
try_import('dotenv', 'python-dotenv')
try_import('geopy', 'geopy')
try_import('hyphen', 'PyHyphen')
try_import('bs4', 'beautifulsoup4')
try_import('nltk')
try_import('fastapi')
import nltk
from dotenv import load_dotenv
from geopy.geocoders import Nominatim
import gradio as gr
import ollama
import pytz
import requests
import yfinance as yf
from hyphen import hyphenator
from bs4 import BeautifulSoup
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
@ -143,7 +123,7 @@ MODEL_NAME = defines.model
LOG_LEVEL="info"
USE_TLS=False
WEB_HOST="0.0.0.0"
WEB_PORT=5000
WEB_PORT=8911
DEFAULT_HISTORY_LENGTH=5
# %%
@ -157,14 +137,37 @@ When answering queries, follow these steps:
1. First analyze the query to determine if real-time information might be helpful
2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
4. When presenting information like weather forecasts, include relevant emojis immediately before the corresponding text. For example, for a sunny day, say \"☀️ Sunny\" or if the forecast says there will be \"rain showers, say \"🌧️ Rain showers\". Use this mapping for weather emojis: Sunny: ☀️, Cloudy: ☁️, Rainy: 🌧️, Snowy: ❄️
4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools
6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so.
""".strip()
system_generate_resume = f"""
You are a professional resume writer. Your task is to write a poliched, tailored resume for a specific job based only on the individual's [WORK HISTORY].
When answering queries, follow these steps:
1. You must not invent or assume any inforation not explicitly present in the [WORK HISTORY].
2. Analyze the [JOB DESCRIPTION] to identify skills required for the job.
3. Use the [JOB DESCRIPTION] provided to guide the focus, tone, and relevant skills or experience to highlight.
4. Identify and emphasisze the experiences, achievements, and responsibilities from the [WORK HISTORY] that best align with the [JOB DESCRIPTION].
5. Do not use the [JOB DESCRIPTION] skills as skills the user posseses unless listed in [WORK HISTORY].
Structure the resume professionally with the following sections where applicable:
* "Name: Use full name."
* "Professional Summary: A 2-4 sentence overview tailored to the job."
* "Skills: A bullet list of key skills derived from the work history and relevant to the job."
* Professional Experience: A detailed list of roles, achievements, and responsibilities from the work history that relate to the job."
* Education: Include only if available in the work history."
Do not include any information unless it is supported by the provided [WORK HISTORY].
Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes.
"""
tool_log = []
command_log = []
model = None
@ -445,6 +448,9 @@ class WebServer:
@self.app.post('/api/chat/{context_id}')
async def chat_endpoint(context_id: str, request: Request):
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
data = await request.json()
@ -468,7 +474,36 @@ class WebServer:
"X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
}
)
@self.app.post('/api/generate-resume/{context_id}')
async def post_generate_resume(context_id: str, request: Request):
if not is_valid_uuid(context_id):
logging.warning(f"Invalid context_id: {context_id}")
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
context = self.upsert_context(context_id)
data = await request.json()
# Create a custom generator that ensures flushing
async def flush_generator():
async for message in self.generate_resume(context=context, content=data['content']):
# Convert to JSON and add newline
yield json.dumps(message) + "\n"
# Save the history as its generated
self.save_context(context_id)
# Explicitly flush after each yield
await asyncio.sleep(0) # Allow the event loop to process the write
# Return StreamingResponse with appropriate headers
return StreamingResponse(
flush_generator(),
media_type="application/json",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
}
)
@self.app.post('/api/context')
async def create_context():
context = self.create_context()
@ -542,12 +577,12 @@ class WebServer:
@self.app.get('/{path:path}')
async def serve_static(path: str):
full_path = os.path.join('/opt/airc/ketr-chat/build', path)
full_path = os.path.join(defines.static_content, path)
if os.path.exists(full_path) and os.path.isfile(full_path):
self.logging.info(f"Serve static request for {full_path}")
return FileResponse(full_path)
self.logging.info(f"Serve index.html for {path}")
return FileResponse('/opt/airc/ketr-chat/build/index.html')
return FileResponse(os.path.join(defines.static_content, 'index.html'))
import requests
@ -565,11 +600,11 @@ class WebServer:
context = self.upsert_context(session_id)
# Create sessions directory if it doesn't exist
if not os.path.exists("sessions"):
os.makedirs("sessions")
if not os.path.exists(defines.session_dir):
os.makedirs(defines.session_dir)
# Create the full file path
file_path = os.path.join("sessions", session_id)
file_path = os.path.join(defines.session_dir, session_id)
# Serialize the data to JSON and write to file
with open(file_path, 'w') as f:
@ -587,7 +622,7 @@ class WebServer:
Returns:
The deserialized dictionary, or a new context if it doesn't exist on disk.
"""
file_path = os.path.join("sessions", session_id)
file_path = os.path.join(defines.session_dir, session_id)
# Check if the file exists
if not os.path.exists(file_path):
@ -606,9 +641,11 @@ class WebServer:
context = {
"id": context_id,
"system": system_context,
"system_generate_resume": system_generate_resume,
"llm_history": [],
"user_history": [],
"tools": default_tools(tools),
"resume_history": [],
"rags": rags.copy(),
"context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage
"message_history_length": 5 # Number of messages to supply in context
@ -681,10 +718,9 @@ class WebServer:
messages = context["system"] + llm_history
try:
yield {"status": "processing", "message": "Processing request..."}
# Estimate token length of new messages
ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=llm_history[-1]["content"])
yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
# Use the async generator in an async for loop
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': ctx_size })
@ -734,9 +770,9 @@ class WebServer:
metadata["tools"] = tools_used
yield {"status": "processing", "message": "Generating final response..."}
# Estimate token length of new messages
ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=messages[pre_add_index:])
yield {"status": "processing", "message": "Generating final response...", "num_ctx": ctx_size }
# Decrease creativity when processing tool call requests
response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': ctx_size }) #, "temperature": 0.5 })
metadata["eval_count"] += response['eval_count']
@ -756,7 +792,7 @@ class WebServer:
user_history.append(final_message)
# Return the REST API with metadata
yield {"status": "done", "message": final_message, "metadata": metadata}
yield {"status": "done", "message": final_message }
except Exception as e:
logging.exception({ 'model': self.model, 'messages': messages, 'error': str(e) })
@ -765,7 +801,79 @@ class WebServer:
finally:
self.processing = False
def run(self, host='0.0.0.0', port=5000, **kwargs):
async def generate_resume(self, context, content):
content = content.strip()
if not content:
yield {"status": "error", "message": "Invalid request"}
return
if self.processing:
yield {"status": "error", "message": "Busy"}
return
self.processing = True
resume_history = context["resume_history"]
metadata = {
"rag": {},
"tools": [],
"eval_count": 0,
"eval_duration": 0,
"prompt_eval_count": 0,
"prompt_eval_duration": 0,
}
rag_docs = []
for rag in context["rags"]:
if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10)
if chroma_results:
rag_docs.extend(chroma_results["documents"])
metadata["rag"] = { "name": rag["name"], **chroma_results }
preamble = f"The current time is {DateTime()}\n"
if len(rag_docs):
preamble = f"""[WORK HISTORY]:\n"""
for doc in rag_docs:
preamble += doc
preamble += f"\n[/WORK HISTORY]\n"
content = f"{preamble}\nUse the above WORK HISTORY to create the resume for this JOB DESCRIPTION. Do not use the JOB DESCRIPTION skills as skills the user posseses unless listed in WORK HISTORY:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n"
try:
# Estimate token length of new messages
ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_generate_resume, content])
yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
# Use the async generator in an async for loop
response = self.client.generate(model=self.model, system=system_generate_resume, prompt=content, options={ 'num_ctx': ctx_size })
metadata["eval_count"] += response['eval_count']
metadata["eval_duration"] += response['eval_duration']
metadata["prompt_eval_count"] += response['prompt_eval_count']
metadata["prompt_eval_duration"] += response['prompt_eval_duration']
context["context_tokens"] = response['prompt_eval_count'] + response['eval_count']
reply = response['response']
final_message = {"role": "assistant", "content": reply, "metadata": metadata }
resume_history.append({
'job_description': content,
'resume': reply,
'metadata': metadata
})
# Return the REST API with metadata
yield {"status": "done", "message": final_message }
except Exception as e:
logging.exception({ 'model': self.model, 'content': content, 'error': str(e) })
yield {"status": "error", "message": f"An error occurred: {str(e)}"}
finally:
self.processing = False
def run(self, host='0.0.0.0', port=WEB_PORT, **kwargs):
import uvicorn
uvicorn.run(self.app, host=host, port=port)
@ -783,7 +891,7 @@ def main():
client = ollama.Client(host=args.ollama_server)
model = args.ollama_model
documents = Rag.load_text_files("doc")
documents = Rag.load_text_files(defines.doc_dir)
print(f"Documents loaded {len(documents)}")
collection = Rag.get_vector_collection()
chunks = Rag.create_chunks_from_documents(documents)

View File

@ -4,5 +4,8 @@ ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
#model = "llama3.2"
model="qwen2.5:7b"
encoding_model="mxbai-embed-large"
persist_directory="./chromadb"
max_context = 2048*8*2
persist_directory="/root/.cache/chromadb"
max_context = 2048*8*2
doc_dir = "/opt/backstory/docs/"
session_dir = "/opt/backstory/sessions"
static_content = '/opt/backstory/frontend/build'

View File

@ -104,7 +104,7 @@ if __name__ == "__main__":
# When running directly, use absolute imports
import defines
llm = ollama.Client(host=defines.ollama_api_url)
documents = load_text_files("doc")
documents = load_text_files(defines.doc_dir)
print(f"Documents loaded {len(documents)}")
collection = get_vector_collection()
chunks = create_chunks_from_documents(documents)
@ -113,5 +113,6 @@ if __name__ == "__main__":
print(f"Document types: {doc_types}")
print(f"Vectorstore created with {collection.count()} documents")
query = "Can you describe James Ketrenos' work history?"
top_docs = find_similar(llm, query, top_k=3)
top_docs = find_similar(llm, collection, query, top_k=3)
print(top_docs)