Rethemed and renamed
3
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
.env
|
||||
cache/**
|
||||
jupyter/**
|
||||
ollama/**
|
||||
ollama/**
|
||||
sessions/**
|
||||
|
74
Dockerfile
@ -133,20 +133,20 @@ RUN pip install requests wheel
|
||||
RUN python setup.py clean --all bdist_wheel --linux
|
||||
|
||||
#
|
||||
# The main airc image:
|
||||
# The main backstory image:
|
||||
# * python 3.11
|
||||
# * pytorch xpu w/ ipex-llm
|
||||
# * ollama-ipex-llm
|
||||
# * src/server.py - model server supporting RAG and fine-tuned models
|
||||
#
|
||||
# Agents using server:
|
||||
# * src/web-ui.py - REACT server (airc.ketrenos.com)
|
||||
# * src/irc.py - IRC backend (irc.libera.chat #airc-test)
|
||||
# * src/web-ui.py - REACT server (backstory.ketrenos.com)
|
||||
# * src/irc.py - IRC backend (irc.libera.chat #backstory-test)
|
||||
# * src/cli.py - Command line chat
|
||||
#
|
||||
# Utilities:
|
||||
# * src/training-fine-tune.py - Perform fine-tuning on currated documents
|
||||
FROM ubuntu:oracular AS airc
|
||||
FROM ubuntu:oracular AS backstory
|
||||
|
||||
COPY --from=python-build /opt/python /opt/python
|
||||
|
||||
@ -184,25 +184,25 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
WORKDIR /opt/airc
|
||||
WORKDIR /opt/backstory
|
||||
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||
|
||||
# Setup the ollama python virtual environment
|
||||
RUN python3 -m venv --system-site-packages /opt/airc/venv
|
||||
RUN python3 -m venv --system-site-packages /opt/backstory/venv
|
||||
|
||||
# Setup the docker pip shell
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||
echo 'source /opt/backstory/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||
} > /opt/airc/shell ; \
|
||||
chmod +x /opt/airc/shell
|
||||
} > /opt/backstory/shell ; \
|
||||
chmod +x /opt/backstory/shell
|
||||
|
||||
# Activate the pip environment on all shell calls
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
SHELL [ "/opt/backstory/shell" ]
|
||||
|
||||
|
||||
# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
|
||||
@ -246,24 +246,15 @@ RUN pip install einops diffusers # Required for IPEX optimize(), which is requir
|
||||
# Install packages needed for stock.py
|
||||
RUN pip install yfinance pyzt geopy PyHyphen nltk
|
||||
|
||||
# While running in development mode via bind mounts, don't copy
|
||||
# the source or follow on containers will always rebuild whenever
|
||||
# the source changes.
|
||||
#COPY /src/ /opt/backstory/src/
|
||||
COPY /src/requirements.txt /opt/backstory/src/requirements.txt
|
||||
RUN pip install -r /opt/backstory/src/requirements.txt
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
# Don't install the full oneapi essentials; just the ones that we seem to need
|
||||
# RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
# | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
|
||||
# && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
|
||||
# | tee /etc/apt/sources.list.d/oneAPI.list \
|
||||
# && apt-get update \
|
||||
# && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
# intel-oneapi-mkl-sycl-2025.0 \
|
||||
# intel-oneapi-dnnl-2025.0 \
|
||||
# intel-oneapi-dpcpp-cpp-2025.0 \
|
||||
# && apt-get clean \
|
||||
# && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
# dpcpp is needed for LoRA backend when
|
||||
# libze-dev is needed for LoRA/triton backend in order to build stuff
|
||||
# Unfortunately, that fails with:
|
||||
# ImportError: /opt/airc/venv/lib/python3.11/site-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libncurses6 \
|
||||
@ -274,24 +265,18 @@ COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
|
||||
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
|
||||
RUN usermod -aG ze-monitor root
|
||||
|
||||
# While running in development mode via bind mounts, don't copy
|
||||
# the source or follow on containers will always rebuild whenever
|
||||
# the source changes.
|
||||
#COPY /src/ /opt/airc/src/
|
||||
COPY /src/requirements.txt /opt/airc/src/requirements.txt
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash'; \
|
||||
echo 'echo "Container: airc"'; \
|
||||
echo 'echo "Container: backstory"'; \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/airc"'; \
|
||||
echo 'echo "Setting pip environment to /opt/backstory"'; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate'; \
|
||||
echo 'source /opt/backstory/venv/bin/activate'; \
|
||||
echo ''; \
|
||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \
|
||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \
|
||||
echo ' echo "Dropping to shell"'; \
|
||||
echo ' shift' ; \
|
||||
echo ' echo "Running: ${@}"' ; \
|
||||
@ -301,8 +286,11 @@ RUN { \
|
||||
echo ' exec /bin/bash'; \
|
||||
echo ' fi' ; \
|
||||
echo 'else'; \
|
||||
echo ' echo "Launching AIRC chat server..."'; \
|
||||
echo ' python src/airc.py "${@}"' ; \
|
||||
echo ' while true; do'; \
|
||||
echo ' echo "Launching Backstory server..."'; \
|
||||
echo ' python src/server.py "${@}" || echo "Backstory server died. Restarting in 3 seconds."'; \
|
||||
echo ' sleep 3'; \
|
||||
echo ' done' ; \
|
||||
echo 'fi'; \
|
||||
} > /entrypoint.sh \
|
||||
&& chmod +x /entrypoint.sh
|
||||
@ -422,9 +410,9 @@ ENV PATH=/opt/ollama:${PATH}
|
||||
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||
|
||||
FROM airc AS jupyter
|
||||
FROM backstory AS jupyter
|
||||
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
SHELL [ "/opt/backstory/shell" ]
|
||||
|
||||
# BEGIN setup Jupyter
|
||||
RUN pip install \
|
||||
@ -433,13 +421,13 @@ RUN pip install \
|
||||
&& jupyter lab build --dev-build=False --minimize=False
|
||||
# END setup Jupyter
|
||||
|
||||
RUN pip install -r /opt/airc/src/requirements.txt
|
||||
RUN pip install -r /opt/backstory/src/requirements.txt
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'echo "Container: airc jupyter"' ; \
|
||||
echo 'echo "Container: backstory jupyter"' ; \
|
||||
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
|
||||
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
|
||||
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
|
||||
@ -451,7 +439,7 @@ RUN { \
|
||||
echo 'fi' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||
echo 'source /opt/backstory/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
|
||||
echo 'while true; do' ; \
|
||||
echo ' echo "Launching jupyter lab"' ; \
|
||||
|
73
README.md
@ -1,12 +1,12 @@
|
||||
# AIRC (pronounced Eric)
|
||||
# Backstory
|
||||
|
||||
AI is Really Cool
|
||||
Backstory is an AI Resume agent that provides context into a diverse career narative.
|
||||
|
||||
This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
|
||||
This project provides an AI chat client. While it can run a variety of LLM models, it is currently running Qwen2.5:7b. In addition to the standard model, enhanced with a RAG expert system that will chunk and embed any text files in `./docs`. It also exposes several utility tools for the LLM to use to obtain real-time data.
|
||||
|
||||
Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
|
||||
Internally, it is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
|
||||
|
||||
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them.
|
||||
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/backstory/issues)--I have some routines I can put in, but don't have a way to test them.
|
||||
|
||||
# Installation
|
||||
|
||||
@ -26,8 +26,8 @@ NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu]
|
||||
|
||||
|
||||
```bash
|
||||
git clone https://github.com/jketreno/airc
|
||||
cd airc
|
||||
git clone https://github.com/jketreno/backstory
|
||||
cd backstory
|
||||
docker compose build
|
||||
```
|
||||
|
||||
@ -37,12 +37,12 @@ This project provides the following containers:
|
||||
|
||||
| Container | Purpose |
|
||||
|:----------|:---------------------------------------------------------------|
|
||||
| airc | Base container with GPU packages installed and configured |
|
||||
| jupyter | airc + Jupyter notebook for running Jupyter sessions |
|
||||
| backstory | Base container with GPU packages installed and configured. Main server entry point. Also used for frontend development. |
|
||||
| jupyter | backstory + Jupyter notebook for running Jupyter sessions |
|
||||
| miniircd | Tiny deployment of an IRC server for testing IRC agents |
|
||||
| ollama | Installation of Intel's pre-built Ollama.cpp |
|
||||
|
||||
While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
|
||||
While developing Backstory, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
|
||||
|
||||
To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
|
||||
|
||||
@ -83,33 +83,43 @@ directory which will enable model downloads to be persisted.
|
||||
|
||||
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
|
||||
|
||||
### AIRC
|
||||
### Backstory
|
||||
|
||||
To launch the airc shell interactively, with the pytorch 2.6 environment loaded, use the default entrypoint to launch a shell:
|
||||
If you just want to run the pre-built environment, you can run:
|
||||
|
||||
```bash
|
||||
docker compose run --rm airc shell
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Once in the shell, you can then launch the model-server.py and then the airc.py client:
|
||||
That will launch all the required containers. Once loaded, the following ports are exposed:
|
||||
|
||||
#### Container: backstory
|
||||
|
||||
* 8911 - http for the chat server. If you want https (recommended) then you should use an nginx reverse proxy to provide this endpoint. See src/server.py WEB_PORT and docker-compose `ports` under the `backstory` service. This port is safe to be exposed to the Internet if you want to expose this from your own service.
|
||||
* 3000 - During interactive development of the frontend, the React server can be found at this port. By default, static content is served through port 8911. Do not expose this port to the Internet.
|
||||
|
||||
#### Container: jupyter
|
||||
|
||||
* 8888 - Jupyter Notebook. You can access this port for a Juptyer notebook running on top of the `backstory` base container.
|
||||
* 60673 - This allows you to connect to Gradio apps from outside the container, provided you launch the Gradio on port 60673 `.launch(server_name="0.0.0.0", server_port=60673)`
|
||||
|
||||
#### Container: ollama
|
||||
|
||||
* 11434 - ollama server port. This should not be exposed to the Internet. You can use it via curl/wget locally. The `backstory` and `jupyter` containers are on the same Docker network, so they do not need this port exposed if you don't want it. See docker-compose.yml `ports` under `ollama`.
|
||||
|
||||
Once the above is running, to launch the backstory shell interactively:
|
||||
|
||||
```bash
|
||||
docker compose run --rm airc shell
|
||||
src/airc.py --ai-server=http://localhost:5000 &
|
||||
src/model-server.py
|
||||
docker compose exec --it backstory shell
|
||||
```
|
||||
|
||||
By default, src/airc.py will connect to irc.libera.chat on the airc-test channel. See `python src/airc.py --help` for options.
|
||||
|
||||
By separating the model-server into its own process, you can develop and tweak the chat backend without losing the IRC connection established by airc.
|
||||
|
||||
### Jupyter
|
||||
|
||||
```bash
|
||||
docker compose up jupyter -d
|
||||
```
|
||||
|
||||
The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
|
||||
The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to `/opt/jupyter` in the container, which is where notebooks will be saved by default.
|
||||
|
||||
To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
|
||||
|
||||
@ -118,28 +128,17 @@ To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
|
||||
You can run `ze-monitor` within the launched containers to monitor GPU usage.
|
||||
|
||||
```bash
|
||||
containers=($(docker ps --filter "ancestor=airc" --format "{{.ID}}"))
|
||||
if [[ ${#containers[*]} -eq 0 ]]; then
|
||||
echo "Running airc container not found."
|
||||
else
|
||||
for container in ${containers[@]}; do
|
||||
echo "Container ${container} devices:"
|
||||
docker exec -it ${container} ze-monitor
|
||||
done
|
||||
fi
|
||||
docker compose exec backstory ze-monitor --list
|
||||
```
|
||||
|
||||
If an airc container is running, you should see something like:
|
||||
|
||||
```
|
||||
Container 5317c503e771 devices:
|
||||
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
```
|
||||
|
||||
You can then launch ze-monitor in that container specifying the device you wish to monitor:
|
||||
To monitor a device:
|
||||
|
||||
```
|
||||
containers=($(docker ps --filter "ancestor=airc" --format "{{.ID}}"))
|
||||
docker exec -it ${containers[0]} ze-monitor --device 2
|
||||
```bash
|
||||
docker compose exec backstory ze-monitor --device 2
|
||||
```
|
@ -1,10 +1,10 @@
|
||||
services:
|
||||
airc:
|
||||
backstory:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: airc
|
||||
image: airc
|
||||
target: backstory
|
||||
image: backstory
|
||||
restart: "no"
|
||||
env_file:
|
||||
- .env
|
||||
@ -15,13 +15,14 @@ services:
|
||||
networks:
|
||||
- internal
|
||||
ports:
|
||||
- 8911:8911
|
||||
- 8911:8911 # Flask React server
|
||||
- 3000:3000 # REACT expo while developing frontend
|
||||
volumes:
|
||||
- ./cache:/root/.cache
|
||||
- ./src:/opt/airc/src:rw
|
||||
- ./doc:/opt/airc/doc:ro
|
||||
- ./results:/opt/airc/results:rw
|
||||
- ./ketr-chat:/opt/airc/ketr-chat:rw # Live mount src
|
||||
- ./cache:/root/.cache # Persist all models and GPU kernel cache
|
||||
- ./sessions:/opt/backstory/sessions:rw # Persist sessions
|
||||
- ./docs:/opt/backstory/docs:ro # Live mount of RAG content
|
||||
- ./src:/opt/backstory/src:rw # Live mount server src
|
||||
- ./frontend:/opt/backstory/frontend:rw # Live mount frontend src
|
||||
cap_add: # used for running ze-monitor within container
|
||||
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
||||
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
||||
@ -69,15 +70,11 @@ services:
|
||||
ports:
|
||||
- 8888:8888 # Jupyter Notebook
|
||||
- 60673:60673 # Gradio
|
||||
- 5000:5000 # Flask React server
|
||||
- 3000:3000 # REACT expo
|
||||
networks:
|
||||
- internal
|
||||
volumes:
|
||||
- ./jupyter:/opt/jupyter:rw
|
||||
- ./cache:/root/.cache
|
||||
- ./src:/opt/airc/src:rw # Live mount src
|
||||
- ./ketr-chat:/opt/airc/ketr-chat:rw # Live mount src
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
|
BIN
frontend/favicon.ico
Executable file
After Width: | Height: | Size: 151 KiB |
BIN
frontend/favicon.png
Normal file
After Width: | Height: | Size: 6.9 KiB |
@ -1,11 +1,11 @@
|
||||
{
|
||||
"name": "ketr-chat",
|
||||
"name": "airc",
|
||||
"version": "0.1.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "ketr-chat",
|
||||
"name": "airc",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"@emotion/react": "^11.14.0",
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "ketr-chat",
|
||||
"name": "airc",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"dependencies": {
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
BIN
frontend/public/backstory_favicon.ico
Executable file
After Width: | Height: | Size: 4.5 KiB |
Before Width: | Height: | Size: 4.9 KiB After Width: | Height: | Size: 4.9 KiB |
7
frontend/public/docs/about.md
Normal file
@ -0,0 +1,7 @@
|
||||
# About Backstory
|
||||
|
||||
This application was developed to achieve a few goals:
|
||||
|
||||
1. See if it is realistic to self-host AI LLMs. Turns out, it is -- with constraints.
|
||||
2. Provide a recent example of my capabilities; many of my projects while working for Intel were internally facing. The source code to this project is available on [GitHub](https://github.com/jketreno/backstory).
|
||||
3. My career at Intel was diverse. Over the years, I have worked on many projects almost everywhere in the computer ecosystem. That results in a resume that is either too long, or too short. This application is intended to provide a quick way for employers to ask the LLM about me.
|
BIN
frontend/public/favicon.ico
Executable file
After Width: | Height: | Size: 9.4 KiB |
BIN
frontend/public/favicon.png
Executable file
After Width: | Height: | Size: 40 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
@ -24,7 +24,7 @@
|
||||
work correctly both with client-side routing and a non-root public URL.
|
||||
Learn how to configure a non-root public URL by running `npm run build`.
|
||||
-->
|
||||
<title>ai.ketrenos.com</title>
|
||||
<title>Backstory</title>
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
BIN
frontend/public/logo.png
Executable file
After Width: | Height: | Size: 1.2 MiB |
BIN
frontend/public/logo192.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
frontend/public/logo512.png
Normal file
After Width: | Height: | Size: 149 KiB |
BIN
frontend/public/main-logo.png
Executable file
After Width: | Height: | Size: 1.2 MiB |
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 5.0 KiB |
Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 6.2 KiB |
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 4.7 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
@ -2,6 +2,20 @@ div {
|
||||
box-sizing: border-box
|
||||
}
|
||||
|
||||
.TabPanel {
|
||||
display: flex;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.MuiToolbar-root .MuiBox-root {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.MuiTabs-root .MuiTabs-indicator {
|
||||
background-color: orange;
|
||||
|
||||
}
|
||||
|
||||
.SystemInfo {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
@ -32,7 +46,7 @@ div {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
flex-grow: 1;
|
||||
max-width: 800px;
|
||||
max-width: 1024px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
@ -67,6 +81,7 @@ div {
|
||||
padding: 10px;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
max-height: 100%;
|
||||
}
|
||||
|
||||
.user-message.MuiCard-root {
|
||||
@ -89,6 +104,7 @@ div {
|
||||
flex-grow: 0;
|
||||
}
|
||||
|
||||
.About.MuiCard-root,
|
||||
.assistant-message.MuiCard-root {
|
||||
border: 1px solid #E0E0E0;
|
||||
background-color: #FFFFFF;
|
||||
@ -108,18 +124,30 @@ div {
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
|
||||
.About.MuiCard-root {
|
||||
display: flex;
|
||||
flex-grow: 1;
|
||||
width: 100%;
|
||||
margin-left: 0;
|
||||
margin-right: 0;
|
||||
}
|
||||
|
||||
.About .MuiCardContent-root,
|
||||
.assistant-message .MuiCardContent-root {
|
||||
padding: 0 16px !important;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.About span,
|
||||
.assistant-message span {
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.user-message .MuiCardContent-root:last-child,
|
||||
.assistant-message .MuiCardContent-root:last-child {
|
||||
padding: 16px;
|
||||
.assistant-message .MuiCardContent-root:last-child,
|
||||
.About .MuiCardContent-root:last-child {
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.users > div {
|
||||
@ -137,6 +165,7 @@ div {
|
||||
}
|
||||
|
||||
/* Reduce general whitespace in markdown content */
|
||||
.About p.MuiTypography-root,
|
||||
.assistant-message p.MuiTypography-root {
|
||||
margin-top: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
@ -149,7 +178,13 @@ div {
|
||||
.assistant-message h3.MuiTypography-root,
|
||||
.assistant-message h4.MuiTypography-root,
|
||||
.assistant-message h5.MuiTypography-root,
|
||||
.assistant-message h6.MuiTypography-root {
|
||||
.assistant-message h6.MuiTypography-root,
|
||||
.About h1.MuiTypography-root,
|
||||
.About h2.MuiTypography-root,
|
||||
.About h3.MuiTypography-root,
|
||||
.About h4.MuiTypography-root,
|
||||
.About h5.MuiTypography-root,
|
||||
.About h6.MuiTypography-root {
|
||||
margin-top: 1rem;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 1rem;
|
||||
@ -157,17 +192,21 @@ div {
|
||||
|
||||
/* Reduce space in lists */
|
||||
.assistant-message ul.MuiTypography-root,
|
||||
.assistant-message ol.MuiTypography-root {
|
||||
margin-top: 0.5rem;
|
||||
.assistant-message ol.MuiTypography-root,
|
||||
.About ul.MuiTypography-root,
|
||||
.About ol.MuiTypography-root {
|
||||
margin-top: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.About li.MuiTypography-root,
|
||||
.assistant-message li.MuiTypography-root {
|
||||
margin-bottom: 0.25rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.About .MuiTypography-root li,
|
||||
.assistant-message .MuiTypography-root li {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
@ -176,6 +215,7 @@ div {
|
||||
}
|
||||
|
||||
/* Reduce space around code blocks */
|
||||
.About .MuiTypography-root pre,
|
||||
.assistant-message .MuiTypography-root pre {
|
||||
border: 1px solid #F5F5F5;
|
||||
border-radius: 0.5rem;
|
@ -2,6 +2,9 @@ import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'r
|
||||
import FormGroup from '@mui/material/FormGroup';
|
||||
import FormControlLabel from '@mui/material/FormControlLabel';
|
||||
import { styled } from '@mui/material/styles';
|
||||
import Avatar from '@mui/material/Avatar';
|
||||
import Tabs from '@mui/material/Tabs';
|
||||
import Tab from '@mui/material/Tab';
|
||||
import Switch from '@mui/material/Switch';
|
||||
import Divider from '@mui/material/Divider';
|
||||
import Tooltip from '@mui/material/Tooltip';
|
||||
@ -19,7 +22,7 @@ import Drawer from '@mui/material/Drawer';
|
||||
import Toolbar from '@mui/material/Toolbar';
|
||||
import SettingsIcon from '@mui/icons-material/Settings';
|
||||
import CloseIcon from '@mui/icons-material/Close';
|
||||
import IconButton, { IconButtonProps } from '@mui/material/IconButton';
|
||||
import IconButton from '@mui/material/IconButton';
|
||||
import Box from '@mui/material/Box';
|
||||
import CssBaseline from '@mui/material/CssBaseline';
|
||||
import ResetIcon from '@mui/icons-material/History';
|
||||
@ -27,22 +30,16 @@ import SendIcon from '@mui/icons-material/Send';
|
||||
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
||||
import Card from '@mui/material/Card';
|
||||
import CardContent from '@mui/material/CardContent';
|
||||
import CardActions from '@mui/material/CardActions';
|
||||
import Collapse from '@mui/material/Collapse';
|
||||
import Table from '@mui/material/Table';
|
||||
import TableBody from '@mui/material/TableBody';
|
||||
import TableCell from '@mui/material/TableCell';
|
||||
import TableContainer from '@mui/material/TableContainer';
|
||||
import TableHead from '@mui/material/TableHead';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
|
||||
import PropagateLoader from "react-spinners/PropagateLoader";
|
||||
|
||||
import { MuiMarkdown } from "mui-markdown";
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import rehypeKatex from 'rehype-katex'
|
||||
import remarkMath from 'remark-math'
|
||||
import 'katex/dist/katex.min.css' // `rehype-katex` does not import the CSS for you
|
||||
|
||||
import { ResumeBuilder } from './ResumeBuilder';
|
||||
import { Message, MessageList } from './Message';
|
||||
import { SeverityType } from './Snack';
|
||||
import { ContextStatus } from './ContextStatus';
|
||||
|
||||
|
||||
import './App.css';
|
||||
|
||||
@ -51,13 +48,10 @@ import '@fontsource/roboto/400.css';
|
||||
import '@fontsource/roboto/500.css';
|
||||
import '@fontsource/roboto/700.css';
|
||||
|
||||
//const use_mui_markdown = true
|
||||
const use_mui_markdown = true
|
||||
|
||||
const welcomeMarkdown = `
|
||||
# Welcome to AIRC
|
||||
# Welcome to Backstory
|
||||
|
||||
This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||
Backstory was written by James Ketrenos in order to provide answers to questions potential employers may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||
|
||||
You can ask things like:
|
||||
* <ChatQuery text="What is James Ketrenos' work history?"/>
|
||||
@ -68,7 +62,7 @@ You can ask things like:
|
||||
|
||||
Or click the text above to submit that query.
|
||||
|
||||
As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_airc@ketrenos.com**.
|
||||
As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_backstory@ketrenos.com**.
|
||||
`;
|
||||
|
||||
const welcomeMessage = {
|
||||
@ -89,8 +83,6 @@ type Tool = {
|
||||
enabled: boolean
|
||||
};
|
||||
|
||||
type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined;
|
||||
|
||||
interface ControlsParams {
|
||||
tools: Tool[],
|
||||
rags: Tool[],
|
||||
@ -115,33 +107,13 @@ type SystemInfo = {
|
||||
"CPU": string
|
||||
};
|
||||
|
||||
type MessageMetadata = {
|
||||
rag: any,
|
||||
tools: any[],
|
||||
eval_count: number,
|
||||
eval_duration: number,
|
||||
prompt_eval_count: number,
|
||||
prompt_eval_duration: number
|
||||
};
|
||||
|
||||
type MessageData = {
|
||||
role: string,
|
||||
content: string,
|
||||
user?: string,
|
||||
type?: string,
|
||||
id?: string,
|
||||
isProcessing?: boolean,
|
||||
metadata?: MessageMetadata
|
||||
};
|
||||
|
||||
type MessageList = MessageData[];
|
||||
|
||||
|
||||
const getConnectionBase = (loc: any): string => {
|
||||
if (!loc.host.match(/.*battle-linux.*/)) {
|
||||
return loc.protocol + "//" + loc.host;
|
||||
} else {
|
||||
return loc.protocol + "//battle-linux.ketrenos.com:5000";
|
||||
return loc.protocol + "//battle-linux.ketrenos.com:8911";
|
||||
}
|
||||
}
|
||||
|
||||
@ -316,197 +288,30 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, messageHis
|
||||
</div>);
|
||||
}
|
||||
|
||||
interface ExpandMoreProps extends IconButtonProps {
|
||||
expand: boolean;
|
||||
|
||||
interface TabPanelProps {
|
||||
children?: React.ReactNode;
|
||||
index: number;
|
||||
tab: number;
|
||||
}
|
||||
|
||||
const ExpandMore = styled((props: ExpandMoreProps) => {
|
||||
const { expand, ...other } = props;
|
||||
return <IconButton {...other} />;
|
||||
})(({ theme }) => ({
|
||||
marginLeft: 'auto',
|
||||
transition: theme.transitions.create('transform', {
|
||||
duration: theme.transitions.duration.shortest,
|
||||
}),
|
||||
variants: [
|
||||
{
|
||||
props: ({ expand }) => !expand,
|
||||
style: {
|
||||
transform: 'rotate(0deg)',
|
||||
},
|
||||
},
|
||||
{
|
||||
props: ({ expand }) => !!expand,
|
||||
style: {
|
||||
transform: 'rotate(180deg)',
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
interface MessageInterface {
|
||||
message: MessageData,
|
||||
submitQuery: (text: string) => void
|
||||
};
|
||||
|
||||
interface MessageMetaInterface {
|
||||
metadata: MessageMetadata
|
||||
}
|
||||
const MessageMeta = ({ metadata }: MessageMetaInterface) => {
|
||||
if (metadata === undefined) {
|
||||
return <></>
|
||||
}
|
||||
|
||||
return (<>
|
||||
<Box sx={{ fontSize: "0.8rem", mb: 1 }}>
|
||||
Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response.
|
||||
</Box>
|
||||
<TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
|
||||
<Table aria-label="prompt stats" size="small">
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell></TableCell>
|
||||
<TableCell align="right" >Tokens</TableCell>
|
||||
<TableCell align="right">Time (s)</TableCell>
|
||||
<TableCell align="right">TPS</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
<TableRow key="prompt" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
|
||||
<TableCell component="th" scope="row">Prompt</TableCell>
|
||||
<TableCell align="right">{metadata.prompt_eval_count}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)}</TableCell>
|
||||
</TableRow>
|
||||
<TableRow key="response" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
|
||||
<TableCell component="th" scope="row">Response</TableCell>
|
||||
<TableCell align="right">{metadata.eval_count}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.eval_duration / 10 ** 7) / 100}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)}</TableCell>
|
||||
</TableRow>
|
||||
<TableRow key="total" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
|
||||
<TableCell component="th" scope="row">Total</TableCell>
|
||||
<TableCell align="right">{metadata.prompt_eval_count + metadata.eval_count}</TableCell>
|
||||
<TableCell align="right">{Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100}</TableCell>
|
||||
<TableCell align="right">{Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))}</TableCell>
|
||||
</TableRow>
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
{
|
||||
metadata.tools !== undefined && metadata.tools.length !== 0 &&
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Box sx={{ fontSize: "0.8rem" }}>
|
||||
Tools queried
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
{metadata.tools.map((tool: any, index: number) => <Box key={index}>
|
||||
{index !== 0 && <Divider />}
|
||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "column", mt: 0.5 }}>
|
||||
<div style={{ display: "flex", paddingRight: "1rem", minWidth: "10rem", whiteSpace: "nowrap" }}>
|
||||
{tool.tool}
|
||||
</div>
|
||||
<div style={{ display: "flex", padding: "3px", whiteSpace: "pre-wrap", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{JSON.stringify(tool.result, null, 2)}</div>
|
||||
</Box>
|
||||
</Box>)}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
}
|
||||
{
|
||||
metadata.rag.name !== undefined &&
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Box sx={{ fontSize: "0.8rem" }}>
|
||||
Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
{metadata.rag.ids.map((id: number, index: number) => <Box key={index}>
|
||||
{index !== 0 && <Divider />}
|
||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }}>
|
||||
<div style={{ display: "flex", flexDirection: "column", paddingRight: "1rem", minWidth: "10rem" }}>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Doc ID: {metadata.rag.ids[index]}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Type: {metadata.rag.metadatas[index].doc_type}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Chunk Len: {metadata.rag.documents[index].length}</div>
|
||||
</div>
|
||||
<div style={{ display: "flex", padding: "3px", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{metadata.rag.documents[index]}</div>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
interface ChatQueryInterface {
|
||||
text: string,
|
||||
submitQuery: (text: string) => void
|
||||
}
|
||||
|
||||
const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => {
|
||||
return (<Button size="small" variant="outlined" sx={{ mb: 1 }} onClick={(e: any) => { console.log(text); submitQuery(text); }}>{text}</Button>);
|
||||
}
|
||||
|
||||
const Message = ({ message, submitQuery }: MessageInterface) => {
|
||||
const [expanded, setExpanded] = React.useState(false);
|
||||
|
||||
const handleExpandClick = () => {
|
||||
setExpanded(!expanded);
|
||||
};
|
||||
|
||||
const formattedContent = message.content.trim();
|
||||
function CustomTabPanel(props: TabPanelProps) {
|
||||
const { children, tab, index, ...other } = props;
|
||||
|
||||
return (
|
||||
<Card sx={{ flexGrow: 1, pb: message.metadata ? 0 : "8px" }} className={(message.role === 'user' ? 'user-message' : 'assistant-message')}>
|
||||
<CardContent>
|
||||
{message.role === 'assistant' ?
|
||||
use_mui_markdown ? <MuiMarkdown children={formattedContent} overrides={{
|
||||
ChatQuery: {
|
||||
component: ChatQuery,
|
||||
props: {
|
||||
submitQuery
|
||||
}, // Optional: pass default props if needed
|
||||
},
|
||||
}} /> : <ReactMarkdown remarkPlugins={[remarkMath]}
|
||||
rehypePlugins={[rehypeKatex]} children={formattedContent} />
|
||||
:
|
||||
<Typography variant="body2" sx={{ color: 'text.secondary' }}>
|
||||
{message.content}
|
||||
</Typography>
|
||||
}
|
||||
</CardContent>
|
||||
{message.metadata && <>
|
||||
<CardActions disableSpacing>
|
||||
<Typography sx={{ color: "darkgrey", p: 1, textAlign: "end", flexGrow: 1 }}>LLM information for this query</Typography>
|
||||
<ExpandMore
|
||||
expand={expanded}
|
||||
onClick={handleExpandClick}
|
||||
aria-expanded={expanded}
|
||||
aria-label="show more"
|
||||
>
|
||||
<ExpandMoreIcon />
|
||||
</ExpandMore>
|
||||
</CardActions>
|
||||
<Collapse in={expanded} timeout="auto" unmountOnExit>
|
||||
<CardContent>
|
||||
<MessageMeta metadata={message.metadata} />
|
||||
</CardContent>
|
||||
</Collapse>
|
||||
</>}
|
||||
</Card>
|
||||
<div
|
||||
className="TabPanel"
|
||||
role="tabpanel"
|
||||
style={{ "display": tab === index ? "flex": "none" }}
|
||||
id={`tabpanel-${index}`}
|
||||
aria-labelledby={`tab-${index}`}
|
||||
{...other}
|
||||
>
|
||||
{tab === index && children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
type ContextStatus = {
|
||||
context_used: number,
|
||||
max_context: number
|
||||
};
|
||||
|
||||
const App = () => {
|
||||
const [query, setQuery] = useState('');
|
||||
const [conversation, setConversation] = useState<MessageList>([]);
|
||||
@ -531,7 +336,8 @@ const App = () => {
|
||||
const [lastPromptTPS, setLastPromptTPS] = useState<number>(430);
|
||||
const [countdown, setCountdown] = useState<number>(0);
|
||||
const [messageHistoryLength, setMessageHistoryLength] = useState<number>(5);
|
||||
|
||||
const [tab, setTab] = useState<number>(0);
|
||||
const [about, setAbout] = useState<string>("");
|
||||
const timerRef = useRef<any>(null);
|
||||
|
||||
const startCountdown = (seconds: number) => {
|
||||
@ -611,6 +417,33 @@ const App = () => {
|
||||
});
|
||||
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
|
||||
|
||||
// Get the About markdown
|
||||
useEffect(() => {
|
||||
if (about !== "") {
|
||||
return;
|
||||
}
|
||||
const fetchAbout = async () => {
|
||||
try {
|
||||
const response = await fetch("/docs/about.md", {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw Error("/docs/about.md not found");
|
||||
}
|
||||
const data = await response.text();
|
||||
setAbout(data);
|
||||
} catch (error: any) {
|
||||
console.error('Error obtaining About content information:', error);
|
||||
setAbout("No information provided.");
|
||||
};
|
||||
};
|
||||
|
||||
fetchAbout();
|
||||
}, [about, setAbout])
|
||||
|
||||
// Update the context status
|
||||
const updateContextStatus = useCallback(() => {
|
||||
fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, {
|
||||
@ -963,7 +796,7 @@ const App = () => {
|
||||
case 'QueryInput':
|
||||
sendQuery(query);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -1148,6 +981,7 @@ const App = () => {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const handleSnackClose = (
|
||||
event: React.SyntheticEvent | Event,
|
||||
reason?: SnackbarCloseReason,
|
||||
@ -1159,6 +993,10 @@ const App = () => {
|
||||
setSnackOpen(false);
|
||||
};
|
||||
|
||||
const handleTabChange = (event: React.SyntheticEvent, newValue: number) => {
|
||||
setTab(newValue);
|
||||
};
|
||||
|
||||
const Offset = styled('div')(({ theme }) => theme.mixins.toolbar);
|
||||
|
||||
return (
|
||||
@ -1194,9 +1032,16 @@ const App = () => {
|
||||
<ResetIcon />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
<Typography variant="h6" noWrap component="div">
|
||||
ai.ketrenos.com
|
||||
</Typography>
|
||||
<Box sx={{ borderBottom: 1, borderColor: 'divider' }}>
|
||||
<Tabs value={tab} indicatorColor="secondary"
|
||||
textColor="inherit"
|
||||
variant="fullWidth"
|
||||
onChange={handleTabChange} aria-label="Backstory navigation">
|
||||
<Tab label="Backstory" icon={<Avatar sx={{ width: 24, height: 24 }} variant="rounded" alt="Backstory logo" src="/logo192.png" />} iconPosition="start" />
|
||||
<Tab label="Resume Builder"/>
|
||||
<Tab label="About"/>
|
||||
</Tabs>
|
||||
</Box>
|
||||
|
||||
{
|
||||
mobileOpen === true &&
|
||||
@ -1244,62 +1089,79 @@ const App = () => {
|
||||
{drawer}
|
||||
</Drawer>
|
||||
</Box>
|
||||
<Box component="main" sx={{ flexGrow: 1, overflow: 'auto' }} className="ChatBox" ref={conversationRef}>
|
||||
<Box className="Conversation" sx={{ flexGrow: 2, p: 1 }}>
|
||||
{conversation.map((message, index) => <Message key={index} submitQuery={submitQuery} message={message} />)}
|
||||
<Box sx={{
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
mb: 1
|
||||
}}>
|
||||
<PropagateLoader
|
||||
size="10px"
|
||||
loading={processing}
|
||||
aria-label="Loading Spinner"
|
||||
data-testid="loader"
|
||||
|
||||
<CustomTabPanel tab={tab} index={0}>
|
||||
<Box component="main" sx={{ flexGrow: 1, overflow: 'auto' }} className="ChatBox" ref={conversationRef}>
|
||||
<Box className="Conversation" sx={{ flexGrow: 2, p: 1 }}>
|
||||
{conversation.map((message, index) => <Message key={index} submitQuery={submitQuery} message={message} />)}
|
||||
<Box sx={{
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
mb: 1
|
||||
}}>
|
||||
<PropagateLoader
|
||||
size="10px"
|
||||
loading={processing}
|
||||
aria-label="Loading Spinner"
|
||||
data-testid="loader"
|
||||
/>
|
||||
{processing === true && countdown > 0 && (
|
||||
<Box
|
||||
sx={{
|
||||
pt: 1,
|
||||
fontSize: "0.7rem",
|
||||
color: "darkgrey"
|
||||
}}
|
||||
>Estimated response time: {countdown}s</Box>
|
||||
)}
|
||||
</Box>
|
||||
<Box sx={{ ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", display: "flex", flexDirection: "row", gap: 1, mt: "auto" }}>
|
||||
Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context}
|
||||
{
|
||||
contextUsedPercentage >= 90 ? <Typography sx={{ fontSize: "0.6rem", color: "red" }}>WARNING: Context almost exhausted. You should start a new chat.</Typography>
|
||||
: (contextUsedPercentage >= 50 ? <Typography sx={{ fontSize: "0.6rem", color: "orange" }}>NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls.</Typography>
|
||||
: <></>)
|
||||
}
|
||||
</Box>
|
||||
</Box>
|
||||
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
|
||||
<TextField
|
||||
variant="outlined"
|
||||
disabled={processing}
|
||||
fullWidth
|
||||
type="text"
|
||||
value={query}
|
||||
onChange={(e) => setQuery(e.target.value)}
|
||||
onKeyDown={handleKeyPress}
|
||||
placeholder="Enter your question..."
|
||||
id="QueryInput"
|
||||
/>
|
||||
{processing === true && countdown > 0 && (
|
||||
<Box
|
||||
sx={{
|
||||
pt: 1,
|
||||
fontSize: "0.7rem",
|
||||
color: "darkgrey"
|
||||
}}
|
||||
>Estimated response time: {countdown}s</Box>
|
||||
)}
|
||||
</Box>
|
||||
<Box sx={{ ml: "0.25rem", fontSize: "0.6rem", color: "darkgrey", display: "flex", flexDirection: "row", gap: 1, mt: "auto" }}>
|
||||
Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context}
|
||||
{
|
||||
contextUsedPercentage >= 90 ? <Typography sx={{ fontSize: "0.6rem", color: "red" }}>WARNING: Context almost exhausted. You should start a new chat.</Typography>
|
||||
: (contextUsedPercentage >= 50 ? <Typography sx={{ fontSize: "0.6rem", color: "orange" }}>NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls.</Typography>
|
||||
: <></>)
|
||||
}
|
||||
</Box>
|
||||
</Box>
|
||||
<Box className="Query" sx={{ display: "flex", flexDirection: "row", p: 1 }}>
|
||||
<TextField
|
||||
variant="outlined"
|
||||
disabled={processing}
|
||||
fullWidth
|
||||
type="text"
|
||||
value={query}
|
||||
onChange={(e) => setQuery(e.target.value)}
|
||||
onKeyDown={handleKeyPress}
|
||||
placeholder="Enter your question..."
|
||||
id="QueryInput"
|
||||
/>
|
||||
<AccordionActions>
|
||||
<Tooltip title="Send">
|
||||
<Button sx={{ m: 0 }} variant="contained" onClick={() => { sendQuery(query); }}><SendIcon /></Button>
|
||||
<Button sx={{ m: 1 }} variant="contained" onClick={() => { sendQuery(query); }}><SendIcon /></Button>
|
||||
</Tooltip>
|
||||
</AccordionActions>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
</CustomTabPanel>
|
||||
|
||||
<CustomTabPanel tab={tab} index={1}>
|
||||
<ResumeBuilder {...{isScrolledToBottom, scrollToBottom, processing, setProcessing, setSnack, connectionBase: getConnectionBase(loc), sessionId }}/>
|
||||
</CustomTabPanel>
|
||||
|
||||
<CustomTabPanel tab={tab} index={2}>
|
||||
<Box className="ChatBox">
|
||||
<Box className="Conversation">
|
||||
<Card sx={{ flexGrow: 1, }} className={'About ChatBox'}>
|
||||
<CardContent>
|
||||
<MuiMarkdown>{about}</MuiMarkdown>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Box>
|
||||
</Box>
|
||||
</CustomTabPanel>
|
||||
|
||||
</Box>
|
||||
|
||||
<Snackbar open={snackOpen} autoHideDuration={(snackSeverity === "success" || snackSeverity === "info") ? 1500 : 6000} onClose={handleSnackClose}>
|
||||
<Alert
|
63
frontend/src/BackstoryTheme.tsx
Normal file
@ -0,0 +1,63 @@
|
||||
import { createTheme } from '@mui/material/styles';
|
||||
|
||||
const backstoryTheme = createTheme({
|
||||
palette: {
|
||||
primary: {
|
||||
main: '#1A2536', // Midnight Blue
|
||||
contrastText: '#D3CDBF', // Warm Gray
|
||||
},
|
||||
secondary: {
|
||||
main: '#4A7A7D', // Dusty Teal
|
||||
contrastText: '#FFFFFF', // White
|
||||
},
|
||||
text: {
|
||||
primary: '#2E2E2E', // Charcoal Black
|
||||
secondary: '#1A2536',//D3CDBF', // Warm Gray
|
||||
},
|
||||
background: {
|
||||
default: '#D3CDBF', // Warm Gray
|
||||
paper: '#FFFFFF', // White
|
||||
},
|
||||
action: {
|
||||
active: '#D4A017', // Golden Ochre
|
||||
hover: 'rgba(212, 160, 23, 0.1)', // Golden Ochre with opacity
|
||||
},
|
||||
custom: {
|
||||
highlight: '#D4A017', // Golden Ochre
|
||||
contrast: '#2E2E2E', // Charcoal Black
|
||||
},
|
||||
},
|
||||
typography: {
|
||||
fontFamily: "'Roboto', sans-serif",
|
||||
h1: {
|
||||
fontSize: '2rem',
|
||||
fontWeight: 500,
|
||||
color: '#2E2E2E', // Charcoal Black
|
||||
},
|
||||
body1: {
|
||||
fontSize: '1rem',
|
||||
color: '#2E2E2E', // Charcoal Black
|
||||
},
|
||||
},
|
||||
components: {
|
||||
MuiButton: {
|
||||
styleOverrides: {
|
||||
root: {
|
||||
textTransform: 'none',
|
||||
'&:hover': {
|
||||
backgroundColor: 'rgba(212, 160, 23, 0.2)', // Golden Ochre hover
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
MuiAppBar: {
|
||||
styleOverrides: {
|
||||
root: {
|
||||
backgroundColor: '#1A2536', // Midnight Blue
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
export { backstoryTheme };
|
58
frontend/src/ChatBubble.tsx
Normal file
@ -0,0 +1,58 @@
|
||||
import { Box } from '@mui/material';
|
||||
import { useTheme } from '@mui/material/styles';
|
||||
import { SxProps, Theme } from '@mui/material';
|
||||
import React from 'react';
|
||||
|
||||
interface ChatBubbleProps {
|
||||
isUser: boolean;
|
||||
isFullWidth?: boolean;
|
||||
children: React.ReactNode;
|
||||
sx?: SxProps<Theme>;
|
||||
}
|
||||
|
||||
function ChatBubble({ isUser, isFullWidth, children, sx }: ChatBubbleProps) {
|
||||
const theme = useTheme();
|
||||
|
||||
const userStyle = {
|
||||
backgroundColor: theme.palette.background.default, // Warm Gray (#D3CDBF)
|
||||
border: `1px solid ${theme.palette.custom.highlight}`, // Golden Ochre (#D4A017)
|
||||
borderRadius: '16px 16px 0 16px', // Rounded, flat bottom-right for user
|
||||
padding: theme.spacing(1, 2),
|
||||
maxWidth: isFullWidth ? '100%' : '70%',
|
||||
minWidth: '70%',
|
||||
alignSelf: 'flex-end', // Right-aligned for user
|
||||
color: theme.palette.primary.main, // Midnight Blue (#1A2536) for text
|
||||
'& > *': {
|
||||
color: 'inherit', // Children inherit Midnight Blue unless overridden
|
||||
},
|
||||
};
|
||||
|
||||
const assistantStyle = {
|
||||
backgroundColor: theme.palette.primary.main, // Midnight Blue (#1A2536)
|
||||
border: `1px solid ${theme.palette.secondary.main}`, // Dusty Teal (#4A7A7D)
|
||||
borderRadius: '16px 16px 16px 0', // Rounded, flat bottom-left for assistant
|
||||
padding: theme.spacing(1, 2),
|
||||
maxWidth: isFullWidth ? '100%' : '70%',
|
||||
minWidth: '70%',
|
||||
alignSelf: 'flex-start', // Left-aligned for assistant
|
||||
color: theme.palette.primary.contrastText, // Warm Gray (#D3CDBF) for text
|
||||
'& > *': {
|
||||
color: 'inherit', // Children inherit Warm Gray unless overridden
|
||||
},
|
||||
};
|
||||
|
||||
return (
|
||||
<Box sx={{ ...(isUser ? userStyle : assistantStyle), ...sx }}>
|
||||
{children}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
export type {
|
||||
ChatBubbleProps
|
||||
};
|
||||
|
||||
export {
|
||||
ChatBubble
|
||||
};
|
||||
|
8
frontend/src/ContextStatus.tsx
Normal file
@ -0,0 +1,8 @@
|
||||
type ContextStatus = {
|
||||
context_used: number,
|
||||
max_context: number
|
||||
};
|
||||
|
||||
export type {
|
||||
ContextStatus
|
||||
};
|
201
frontend/src/DocumentViewer.tsx
Normal file
@ -0,0 +1,201 @@
|
||||
import React, { useState } from 'react';
|
||||
import {
|
||||
Typography,
|
||||
Button,
|
||||
Tabs,
|
||||
Tab,
|
||||
Paper,
|
||||
IconButton,
|
||||
Box,
|
||||
useMediaQuery,
|
||||
Divider,
|
||||
Slider,
|
||||
Stack,
|
||||
TextField
|
||||
} from '@mui/material';
|
||||
import Tooltip from '@mui/material/Tooltip';
|
||||
import { useTheme } from '@mui/material/styles';
|
||||
import SendIcon from '@mui/icons-material/Send';
|
||||
import {
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
SwapHoriz,
|
||||
} from '@mui/icons-material';
|
||||
import { SxProps, Theme } from '@mui/material';
|
||||
import { MuiMarkdown } from "mui-markdown";
|
||||
|
||||
import { MessageData } from './MessageMeta';
|
||||
|
||||
interface DocumentComponentProps {
|
||||
title: string;
|
||||
children?: React.ReactNode;
|
||||
}
|
||||
|
||||
interface DocumentViewerProps {
|
||||
generateResume: (jobDescription: string) => void,
|
||||
resume: MessageData | undefined,
|
||||
sx?: SxProps<Theme>,
|
||||
};
|
||||
|
||||
const DocumentViewer: React.FC<DocumentViewerProps> = ({generateResume, resume, sx} : DocumentViewerProps) => {
|
||||
const [jobDescription, setJobDescription] = useState<string>("");
|
||||
const theme = useTheme();
|
||||
const isMobile = useMediaQuery(theme.breakpoints.down('md'));
|
||||
|
||||
// State for controlling which document is active on mobile
|
||||
const [activeDocMobile, setActiveDocMobile] = useState<number>(0);
|
||||
// State for controlling split ratio on desktop
|
||||
const [splitRatio, setSplitRatio] = useState<number>(50);
|
||||
|
||||
// Handle tab change for mobile
|
||||
const handleTabChange = (_event: React.SyntheticEvent, newValue: number): void => {
|
||||
setActiveDocMobile(newValue);
|
||||
};
|
||||
|
||||
// Adjust split ratio
|
||||
const handleSliderChange = (_event: Event, newValue: number | number[]): void => {
|
||||
setSplitRatio(newValue as number);
|
||||
};
|
||||
|
||||
// Reset split ratio
|
||||
const resetSplit = (): void => {
|
||||
setSplitRatio(50);
|
||||
};
|
||||
|
||||
const handleKeyPress = (event: any) => {
|
||||
if (event.key === 'Enter' && event.ctrlKey) {
|
||||
generateResume(jobDescription);
|
||||
}
|
||||
};
|
||||
|
||||
// Document component
|
||||
const Document: React.FC<DocumentComponentProps> = ({ title, children }) => (
|
||||
<Box
|
||||
sx={{
|
||||
display: 'flex',
|
||||
flexDirection: 'column',
|
||||
flexGrow: 1,
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
>
|
||||
{ title !== "" &&
|
||||
<Box sx={{ display: 'flex', p: 2, bgcolor: 'primary.light', color: 'primary.contrastText' }}>
|
||||
<Typography variant="h6">{title}</Typography>
|
||||
</Box> }
|
||||
<Box sx={{ display: 'flex', p: 2, flexGrow: 1, overflow: 'auto' }}>
|
||||
{children}
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
|
||||
// Mobile view
|
||||
if (isMobile) {
|
||||
return (
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, ...sx }}>
|
||||
{/* Tabs */}
|
||||
<Tabs
|
||||
value={activeDocMobile}
|
||||
onChange={handleTabChange}
|
||||
variant="fullWidth"
|
||||
sx={{ bgcolor: 'background.paper' }}
|
||||
>
|
||||
<Tab label="Job Description" />
|
||||
<Tab label="Resume" />
|
||||
</Tabs>
|
||||
|
||||
{/* Document display area */}
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, overflow: 'hidden', p: 2 }}>
|
||||
{activeDocMobile === 0 ? (<>
|
||||
<Document title="">
|
||||
<TextField
|
||||
variant="outlined"
|
||||
fullWidth
|
||||
multiline
|
||||
type="text"
|
||||
sx={{
|
||||
flex: 1, // Makes the TextField fill the parent height
|
||||
flexGrow: 1,
|
||||
maxHeight: '100%', // Prevents it from growing larger than the parent height
|
||||
overflow: 'auto', // Enables scrollbars if the content overflows
|
||||
}}
|
||||
value={jobDescription}
|
||||
onChange={(e) => setJobDescription(e.target.value)}
|
||||
onKeyDown={handleKeyPress}
|
||||
placeholder="Enter job description..."
|
||||
/>
|
||||
</Document>
|
||||
<Button onClick={(e: any) => { generateResume(jobDescription); } }>Generate</Button>
|
||||
</>) : (
|
||||
<Document title="">{ resume !== undefined && <MuiMarkdown children={resume.content.trim()}/> }</Document>
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
// Desktop view
|
||||
return (
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', flexGrow: 1, ...sx }}>
|
||||
{/* Split document view */}
|
||||
<Box sx={{ display: 'flex', flexGrow: 1, overflow: 'hidden', p: 2 }}>
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', width: `${splitRatio}%`, pr: 1, flexGrow: 1, overflow: 'hidden' }}>
|
||||
<Document title="Job Description">
|
||||
<TextField
|
||||
variant="outlined"
|
||||
fullWidth
|
||||
type="text"
|
||||
multiline
|
||||
sx={{
|
||||
flex: 1, // Makes the TextField fill the parent height
|
||||
flexGrow: 1,
|
||||
maxHeight: '100%', // Prevents it from growing larger than the parent height
|
||||
overflow: 'auto', // Enables scrollbars if the content overflows
|
||||
}}
|
||||
value={jobDescription}
|
||||
onChange={(e) => setJobDescription(e.target.value)}
|
||||
onKeyDown={handleKeyPress}
|
||||
placeholder="Enter job description..."
|
||||
/>
|
||||
</Document>
|
||||
<Tooltip title="Generate">
|
||||
<Button sx={{ m: 1, gap: 1 }} variant="contained" onClick={() => { generateResume(jobDescription); }}>Generate<SendIcon /></Button>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
<Divider orientation="vertical" flexItem />
|
||||
<Box sx={{ display: 'flex', width: `${100 - splitRatio}%`, pl: 1, flexGrow: 1 }}>
|
||||
<Document title="Resume">{ resume !== undefined && <MuiMarkdown children={resume.content.trim()}/> }</Document>
|
||||
</Box>
|
||||
</Box>
|
||||
{/* Split control panel */}
|
||||
<Paper sx={{ p: 2, display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
||||
<Stack direction="row" spacing={2} alignItems="center" sx={{ width: '60%' }}>
|
||||
<IconButton onClick={() => setSplitRatio(Math.max(20, splitRatio - 10))}>
|
||||
<ChevronLeft />
|
||||
</IconButton>
|
||||
|
||||
<Slider
|
||||
value={splitRatio}
|
||||
onChange={handleSliderChange}
|
||||
aria-label="Split ratio"
|
||||
min={20}
|
||||
max={80}
|
||||
/>
|
||||
|
||||
<IconButton onClick={() => setSplitRatio(Math.min(80, splitRatio + 10))}>
|
||||
<ChevronRight />
|
||||
</IconButton>
|
||||
|
||||
<IconButton onClick={resetSplit}>
|
||||
<SwapHoriz />
|
||||
</IconButton>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export type {
|
||||
DocumentViewerProps
|
||||
};
|
||||
|
||||
export { DocumentViewer };
|
34
frontend/src/ExpandMore.tsx
Normal file
@ -0,0 +1,34 @@
|
||||
import { styled } from '@mui/material/styles';
|
||||
import IconButton, { IconButtonProps } from '@mui/material/IconButton';
|
||||
|
||||
interface ExpandMoreProps extends IconButtonProps {
|
||||
expand: boolean;
|
||||
}
|
||||
|
||||
const ExpandMore = styled((props: ExpandMoreProps) => {
|
||||
const { expand, ...other } = props;
|
||||
return <IconButton {...other} />;
|
||||
})(({ theme }) => ({
|
||||
marginLeft: 'auto',
|
||||
transition: theme.transitions.create('transform', {
|
||||
duration: theme.transitions.duration.shortest,
|
||||
}),
|
||||
variants: [
|
||||
{
|
||||
props: ({ expand }) => !expand,
|
||||
style: {
|
||||
transform: 'rotate(0deg)',
|
||||
},
|
||||
},
|
||||
{
|
||||
props: ({ expand }) => !!expand,
|
||||
style: {
|
||||
transform: 'rotate(180deg)',
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
export {
|
||||
ExpandMore
|
||||
};
|
100
frontend/src/Message.tsx
Normal file
@ -0,0 +1,100 @@
|
||||
import { useState } from 'react';
|
||||
import Box from '@mui/material/Box';
|
||||
import Button from '@mui/material/Button';
|
||||
import CardContent from '@mui/material/CardContent';
|
||||
import CardActions from '@mui/material/CardActions';
|
||||
import Collapse from '@mui/material/Collapse';
|
||||
import { MuiMarkdown } from "mui-markdown";
|
||||
import Typography from '@mui/material/Typography';
|
||||
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
||||
import { ExpandMore } from './ExpandMore';
|
||||
|
||||
import { MessageData, MessageMeta } from './MessageMeta';
|
||||
import { ChatBubble } from './ChatBubble';
|
||||
|
||||
type MessageList = MessageData[];
|
||||
|
||||
interface MessageInterface {
|
||||
message?: MessageData,
|
||||
isFullWidth?: boolean,
|
||||
submitQuery?: (text: string) => void
|
||||
};
|
||||
|
||||
interface ChatQueryInterface {
|
||||
text: string,
|
||||
submitQuery?: (text: string) => void
|
||||
}
|
||||
|
||||
const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => {
|
||||
return (submitQuery
|
||||
? <Button variant="outlined" sx={{
|
||||
color: theme => theme.palette.custom.highlight, // Golden Ochre (#D4A017)
|
||||
borderColor: theme => theme.palette.custom.highlight,
|
||||
mt: 1,
|
||||
mb: 1
|
||||
}}
|
||||
size="small" onClick={(e: any) => { console.log(text); submitQuery(text); }}>{text}</Button>
|
||||
: <Box>{text}</Box>);
|
||||
}
|
||||
|
||||
const Message = ({ message, submitQuery, isFullWidth }: MessageInterface) => {
|
||||
const [expanded, setExpanded] = useState<boolean>(false);
|
||||
|
||||
const handleExpandClick = () => {
|
||||
setExpanded(!expanded);
|
||||
};
|
||||
|
||||
if (message === undefined) {
|
||||
return (<></>);
|
||||
}
|
||||
|
||||
const formattedContent = message.content.trim();
|
||||
|
||||
return (
|
||||
<ChatBubble isFullWidth={isFullWidth} isUser={message.role === 'user'} sx={{ flexGrow: 1, pb: message.metadata ? 0 : "8px", mb: 1, mt: 1 }}>
|
||||
<CardContent>
|
||||
{message.role === 'assistant' ?
|
||||
<MuiMarkdown children={formattedContent} overrides={{
|
||||
ChatQuery: {
|
||||
component: ChatQuery,
|
||||
props: {
|
||||
submitQuery
|
||||
}, // Optional: pass default props if needed
|
||||
},
|
||||
}} />
|
||||
:
|
||||
<Typography variant="body2" sx={{ color: 'text.secondary' }}>
|
||||
{message.content}
|
||||
</Typography>
|
||||
}
|
||||
</CardContent>
|
||||
{message.metadata && <>
|
||||
<CardActions disableSpacing>
|
||||
<Typography sx={{ color: "darkgrey", p: 1, textAlign: "end", flexGrow: 1 }}>LLM information for this query</Typography>
|
||||
<ExpandMore
|
||||
expand={expanded}
|
||||
onClick={handleExpandClick}
|
||||
aria-expanded={expanded}
|
||||
aria-label="show more"
|
||||
>
|
||||
<ExpandMoreIcon />
|
||||
</ExpandMore>
|
||||
</CardActions>
|
||||
<Collapse in={expanded} timeout="auto" unmountOnExit>
|
||||
<CardContent>
|
||||
<MessageMeta metadata={message.metadata} />
|
||||
</CardContent>
|
||||
</Collapse>
|
||||
</>}
|
||||
</ChatBubble>
|
||||
);
|
||||
};
|
||||
|
||||
export type {
|
||||
MessageInterface,
|
||||
MessageList
|
||||
};
|
||||
export {
|
||||
Message
|
||||
};
|
||||
|
135
frontend/src/MessageMeta.tsx
Normal file
@ -0,0 +1,135 @@
|
||||
//import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
|
||||
import Divider from '@mui/material/Divider';
|
||||
import Accordion from '@mui/material/Accordion';
|
||||
import AccordionSummary from '@mui/material/AccordionSummary';
|
||||
import AccordionDetails from '@mui/material/AccordionDetails';
|
||||
import Box from '@mui/material/Box';
|
||||
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
||||
import Card from '@mui/material/Card';
|
||||
import Table from '@mui/material/Table';
|
||||
import TableBody from '@mui/material/TableBody';
|
||||
import TableCell from '@mui/material/TableCell';
|
||||
import TableContainer from '@mui/material/TableContainer';
|
||||
import TableHead from '@mui/material/TableHead';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
|
||||
type MessageMetadata = {
|
||||
rag: any,
|
||||
tools: any[],
|
||||
eval_count: number,
|
||||
eval_duration: number,
|
||||
prompt_eval_count: number,
|
||||
prompt_eval_duration: number
|
||||
};
|
||||
|
||||
type MessageData = {
|
||||
role: string,
|
||||
content: string,
|
||||
user?: string,
|
||||
type?: string,
|
||||
id?: string,
|
||||
isProcessing?: boolean,
|
||||
metadata?: MessageMetadata
|
||||
};
|
||||
|
||||
interface MessageMetaInterface {
|
||||
metadata: MessageMetadata
|
||||
}
|
||||
const MessageMeta = ({ metadata }: MessageMetaInterface) => {
|
||||
if (metadata === undefined) {
|
||||
return <></>
|
||||
}
|
||||
|
||||
return (<>
|
||||
<Box sx={{ fontSize: "0.8rem", mb: 1 }}>
|
||||
Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response.
|
||||
</Box>
|
||||
<TableContainer component={Card} className="PromptStats" sx={{ mb: 1 }}>
|
||||
<Table aria-label="prompt stats" size="small">
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell></TableCell>
|
||||
<TableCell align="right" >Tokens</TableCell>
|
||||
<TableCell align="right">Time (s)</TableCell>
|
||||
<TableCell align="right">TPS</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
<TableRow key="prompt" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
|
||||
<TableCell component="th" scope="row">Prompt</TableCell>
|
||||
<TableCell align="right">{metadata.prompt_eval_count}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)}</TableCell>
|
||||
</TableRow>
|
||||
<TableRow key="response" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
|
||||
<TableCell component="th" scope="row">Response</TableCell>
|
||||
<TableCell align="right">{metadata.eval_count}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.eval_duration / 10 ** 7) / 100}</TableCell>
|
||||
<TableCell align="right">{Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)}</TableCell>
|
||||
</TableRow>
|
||||
<TableRow key="total" sx={{ '&:last-child td, &:last-child th': { border: 0 } }}>
|
||||
<TableCell component="th" scope="row">Total</TableCell>
|
||||
<TableCell align="right">{metadata.prompt_eval_count + metadata.eval_count}</TableCell>
|
||||
<TableCell align="right">{Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100}</TableCell>
|
||||
<TableCell align="right">{Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))}</TableCell>
|
||||
</TableRow>
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
{
|
||||
metadata.tools !== undefined && metadata.tools.length !== 0 &&
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Box sx={{ fontSize: "0.8rem" }}>
|
||||
Tools queried
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
{metadata.tools.map((tool: any, index: number) => <Box key={index}>
|
||||
{index !== 0 && <Divider />}
|
||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "column", mt: 0.5 }}>
|
||||
<div style={{ display: "flex", paddingRight: "1rem", minWidth: "10rem", whiteSpace: "nowrap" }}>
|
||||
{tool.tool}
|
||||
</div>
|
||||
<div style={{ display: "flex", padding: "3px", whiteSpace: "pre-wrap", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{JSON.stringify(tool.result, null, 2)}</div>
|
||||
</Box>
|
||||
</Box>)}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
}
|
||||
{
|
||||
metadata.rag.name !== undefined &&
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Box sx={{ fontSize: "0.8rem" }}>
|
||||
Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
{metadata.rag.ids.map((id: number, index: number) => <Box key={index}>
|
||||
{index !== 0 && <Divider />}
|
||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }}>
|
||||
<div style={{ display: "flex", flexDirection: "column", paddingRight: "1rem", minWidth: "10rem" }}>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Doc ID: {metadata.rag.ids[index]}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Type: {metadata.rag.metadatas[index].doc_type}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Chunk Len: {metadata.rag.documents[index].length}</div>
|
||||
</div>
|
||||
<div style={{ display: "flex", padding: "3px", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{metadata.rag.documents[index]}</div>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export type {
|
||||
MessageMetadata,
|
||||
MessageMetaInterface,
|
||||
MessageData
|
||||
};
|
||||
|
||||
export { MessageMeta };
|
310
frontend/src/ResumeBuilder.tsx
Normal file
@ -0,0 +1,310 @@
|
||||
import { useState, useCallback, useRef } from 'react';
|
||||
import Box from '@mui/material/Box';
|
||||
import TextField from '@mui/material/TextField';
|
||||
import PropagateLoader from "react-spinners/PropagateLoader";
|
||||
import Tooltip from '@mui/material/Tooltip';
|
||||
import Button from '@mui/material/Button';
|
||||
import SendIcon from '@mui/icons-material/Send';
|
||||
|
||||
import { Message } from './Message';
|
||||
import { SeverityType } from './Snack';
|
||||
import { ContextStatus } from './ContextStatus';
|
||||
import { MessageData } from './MessageMeta';
|
||||
import { DocumentViewer } from './DocumentViewer';
|
||||
|
||||
interface ResumeBuilderProps {
|
||||
scrollToBottom: () => void,
|
||||
isScrolledToBottom: () => boolean,
|
||||
setProcessing: (processing: boolean) => void,
|
||||
processing: boolean,
|
||||
connectionBase: string,
|
||||
sessionId: string | undefined,
|
||||
setSnack: (message: string, severity?: SeverityType) => void,
|
||||
};
|
||||
|
||||
const ResumeBuilder = ({scrollToBottom, isScrolledToBottom, setProcessing, processing, connectionBase, sessionId, setSnack} : ResumeBuilderProps) => {
|
||||
const [jobDescription, setJobDescription] = useState<string>("");
|
||||
const [generateStatus, setGenerateStatus] = useState<MessageData | undefined>(undefined);
|
||||
const [lastEvalTPS, setLastEvalTPS] = useState<number>(35);
|
||||
const [lastPromptTPS, setLastPromptTPS] = useState<number>(430);
|
||||
const [contextStatus, setContextStatus] = useState<ContextStatus>({ context_used: 0, max_context: 0 });
|
||||
const [countdown, setCountdown] = useState<number>(0);
|
||||
const [resume, setResume] = useState<MessageData | undefined>(undefined);
|
||||
const timerRef = useRef<any>(null);
|
||||
|
||||
const updateContextStatus = useCallback(() => {
|
||||
fetch(connectionBase + `/api/context-status/${sessionId}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
setContextStatus(data);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error getting context status:', error);
|
||||
setSnack("Unable to obtain context status.", "error");
|
||||
});
|
||||
}, [setContextStatus, connectionBase, setSnack, sessionId]);
|
||||
|
||||
const startCountdown = (seconds: number) => {
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
setCountdown(seconds);
|
||||
timerRef.current = setInterval(() => {
|
||||
setCountdown((prev) => {
|
||||
if (prev <= 1) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
if (isScrolledToBottom()) {
|
||||
setTimeout(() => {
|
||||
scrollToBottom();
|
||||
}, 50)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return prev - 1;
|
||||
});
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const stopCountdown = () => {
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
setCountdown(0);
|
||||
}
|
||||
};
|
||||
|
||||
if (sessionId === undefined) {
|
||||
return (<></>);
|
||||
}
|
||||
|
||||
|
||||
|
||||
const handleKeyPress = (event: any) => {
|
||||
if (event.key === 'Enter' && !event.ctrlKey) {
|
||||
generateResume(jobDescription);
|
||||
}
|
||||
};
|
||||
|
||||
const generateResume = async (jobDescription: string) => {
|
||||
if (!jobDescription.trim()) return;
|
||||
// setResume(undefined);
|
||||
|
||||
let scrolledToBottom;
|
||||
|
||||
scrollToBottom();
|
||||
|
||||
try {
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
setProcessing(true);
|
||||
|
||||
// Add initial processing message
|
||||
setGenerateStatus({ role: 'assistant', content: 'Processing request...' });
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
|
||||
// Make the fetch request with proper headers
|
||||
const response = await fetch(connectionBase + `/api/generate-resume/${sessionId}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ content: jobDescription.trim() }),
|
||||
});
|
||||
|
||||
// We'll guess that the response will be around 500 tokens...
|
||||
const token_guess = 500;
|
||||
const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS);
|
||||
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
setSnack(`Job description sent. Response estimated in ${estimate}s.`, "info");
|
||||
startCountdown(Math.round(estimate));
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Server responded with ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
throw new Error('Response body is null');
|
||||
}
|
||||
|
||||
// Set up stream processing with explicit chunking
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
|
||||
// Process each complete line immediately
|
||||
buffer += chunk;
|
||||
let lines = buffer.split('\n');
|
||||
buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
|
||||
try {
|
||||
const update = JSON.parse(line);
|
||||
|
||||
// Force an immediate state update based on the message type
|
||||
if (update.status === 'processing') {
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
// Update processing message with immediate re-render
|
||||
setGenerateStatus({ role: 'info', content: update.message });
|
||||
console.log(update.num_ctx);
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
|
||||
// Add a small delay to ensure React has time to update the UI
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
|
||||
} else if (update.status === 'done') {
|
||||
// Replace processing message with final result
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
setGenerateStatus(undefined);
|
||||
setResume(update.message);
|
||||
const metadata = update.message.metadata;
|
||||
const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration;
|
||||
const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration;
|
||||
setLastEvalTPS(evalTPS ? evalTPS : 35);
|
||||
setLastPromptTPS(promptTPS ? promptTPS : 35);
|
||||
updateContextStatus();
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
} else if (update.status === 'error') {
|
||||
// Show error
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
setGenerateStatus({role: 'error', content: update.message });
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
setSnack("Error generating resume", "error")
|
||||
console.error('Error parsing JSON:', e, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process any remaining buffer content
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const update = JSON.parse(buffer);
|
||||
|
||||
if (update.status === 'done') {
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
setGenerateStatus(undefined);
|
||||
setResume(update.message);
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 500);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
setSnack("Error processing job description", "error")
|
||||
}
|
||||
}
|
||||
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
stopCountdown();
|
||||
setProcessing(false);
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Fetch error:', error);
|
||||
setSnack("Unable to process job description", "error");
|
||||
scrolledToBottom = isScrolledToBottom();
|
||||
setGenerateStatus({ role: 'error', content: `Error: ${error}` });
|
||||
setProcessing(false);
|
||||
stopCountdown();
|
||||
if (scrolledToBottom) {
|
||||
setTimeout(() => { scrollToBottom() }, 50);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Box className="ChatBox">
|
||||
<Box className="Conversation">
|
||||
<DocumentViewer sx={{
|
||||
display: "flex",
|
||||
flexGrow: 1,
|
||||
overflowY: "auto",
|
||||
flexDirection: "column",
|
||||
height: "calc(0vh - 0px)", // Hack to make the height work
|
||||
}} {...{ generateResume, resume }} />
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
return (<Box className="ChatBox">
|
||||
<Box className="Conversation">
|
||||
<TextField
|
||||
variant="outlined"
|
||||
autoFocus
|
||||
fullWidth
|
||||
multiline
|
||||
rows="10"
|
||||
type="text"
|
||||
value={jobDescription}
|
||||
onChange={(e) => setJobDescription(e.target.value)}
|
||||
onKeyDown={handleKeyPress}
|
||||
placeholder="Enter the job description.."
|
||||
id="JobDescriptionInput"
|
||||
/>
|
||||
<Tooltip title="Generate">
|
||||
<Button sx={{ m: 1, gap: 1 }} variant="contained" onClick={() => { generateResume(jobDescription); }}>Generate<SendIcon /></Button>
|
||||
</Tooltip>
|
||||
<Box sx={{
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
mb: 1
|
||||
}}>
|
||||
<PropagateLoader
|
||||
size="10px"
|
||||
loading={processing}
|
||||
aria-label="Loading Spinner"
|
||||
data-testid="loader"
|
||||
/>
|
||||
{processing === true && countdown > 0 && (
|
||||
<Box
|
||||
sx={{
|
||||
pt: 1,
|
||||
fontSize: "0.7rem",
|
||||
color: "darkgrey"
|
||||
}}
|
||||
>Estimated response time: {countdown}s</Box>
|
||||
)}
|
||||
|
||||
{generateStatus && <Message isFullWidth={true} message={generateStatus} />}
|
||||
{/* {resume && <Message isFullWidth={true} message={resume} />} */}
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>);
|
||||
}
|
||||
|
||||
|
||||
export type {
|
||||
ResumeBuilderProps
|
||||
};
|
||||
|
||||
export {
|
||||
ResumeBuilder
|
||||
};
|
||||
|
5
frontend/src/Snack.tsx
Normal file
@ -0,0 +1,5 @@
|
||||
type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined;
|
||||
|
||||
export type {
|
||||
SeverityType
|
||||
};
|
@ -1,5 +1,7 @@
|
||||
import React from 'react';
|
||||
import ReactDOM from 'react-dom/client';
|
||||
import { ThemeProvider } from '@mui/material/styles';
|
||||
import { backstoryTheme } from './BackstoryTheme'; // Adjust path as needed
|
||||
import './index.css';
|
||||
import App from './App';
|
||||
import reportWebVitals from './reportWebVitals';
|
||||
@ -9,7 +11,9 @@ const root = ReactDOM.createRoot(
|
||||
);
|
||||
root.render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
<ThemeProvider theme={backstoryTheme}>
|
||||
<App />
|
||||
</ThemeProvider>
|
||||
</React.StrictMode>
|
||||
);
|
||||
|
Before Width: | Height: | Size: 2.6 KiB After Width: | Height: | Size: 2.6 KiB |
16
frontend/src/types/theme.d.ts
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
import { Palette, PaletteOptions } from '@mui/material/styles';
|
||||
|
||||
declare module '@mui/material/styles' {
|
||||
interface Palette {
|
||||
custom: {
|
||||
highlight: string;
|
||||
contrast: string;
|
||||
};
|
||||
}
|
||||
interface PaletteOptions {
|
||||
custom?: {
|
||||
highlight: string;
|
||||
contrast: string;
|
||||
};
|
||||
}
|
||||
}
|
Before Width: | Height: | Size: 318 B |
Before Width: | Height: | Size: 4.6 KiB |
Before Width: | Height: | Size: 318 B |
Before Width: | Height: | Size: 4.6 KiB |
Before Width: | Height: | Size: 2.6 KiB |
Before Width: | Height: | Size: 7.5 KiB |
@ -1,133 +0,0 @@
|
||||
# Ketr Chat
|
||||
|
||||
This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history.
|
||||
|
||||
In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||
|
||||
## Parts of Ketr Chat
|
||||
|
||||
* Backend Server
|
||||
Provides a custom REST API to support the capabilities exposed from the web UI.
|
||||
* Pytorch used for LLM communication and inference
|
||||
* ChromaDB as a vector store for embedding similarities
|
||||
* FastAPI for the http REST API endpoints
|
||||
* Serves the static site for production deployment
|
||||
* Performs all communication with the LLM (currently via ollama.cpp, however I may be switching it back to Hugging Face transformers.)
|
||||
* Implements the tool subsystem for tool callbacks from the LLM
|
||||
* Manages a chromadb vector store, including the chunking and embedding of the documents used to provide RAG content related to my career.
|
||||
* Manages all context sessions
|
||||
* Currently using qwen2.5:7b, however I frequently switch between different models (llama3.2, deepseek-r1:7b, and mistral:7b.) I've generally had the best results from qwen2.5. DeepSeek-R1 was very cool; the thinking phase was informative for developing system prompts, however the integration with ollama does not support tool calls. That is one reason I'm looking to switch back to Hugging Face transformers.
|
||||
* Languages: Python, bash
|
||||
|
||||
* Web Frontend
|
||||
Provides a responsive UI for interacting with the system
|
||||
* Written using React and Mui.
|
||||
* Exposes enough information to know what the LLM is doing on the backend
|
||||
* Enables adjusting various parameters, including enabling/disabling tools and the RAG, system prompt, etc.
|
||||
* Configured to be able to run in development and production. In development mode, the Server does not serve the Web Frontend and only acts as a REST API endpoint.
|
||||
* Languages: JSX, JavaScript, TypeScript, bash
|
||||
|
||||
* Ollama container
|
||||
If you don't already have ollama installed and running, the container provided in this project is built using the Intel pre-built Ollama package.
|
||||
|
||||
* Jupyter notebook
|
||||
To facilitate rapid development and prototyping, a Jupyter notebook is provided which runs on the same Python package set as the main server container.
|
||||
|
||||
# Installation
|
||||
|
||||
This project uses docker containers to build. As this was originally written to work on an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10)..
|
||||
|
||||
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
|
||||
|
||||
## Want to run under WSL2? No can do...
|
||||
|
||||
https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
|
||||
|
||||
The A- and B-series discrete GPUs do not support SR-IOV, required for the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
|
||||
|
||||
## Building
|
||||
|
||||
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
|
||||
|
||||
|
||||
```bash
|
||||
git clone https://github.com/jketreno/ketr-chat
|
||||
cd ketr-chat
|
||||
docker compose build
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
|
||||
|
||||
Edit .env to add the following:
|
||||
|
||||
```.env
|
||||
HF_ACCESS_TOKEN=<access token from huggingface>
|
||||
```
|
||||
|
||||
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
|
||||
|
||||
### Ketr Chat
|
||||
|
||||
To launch the ketr-chat shell interactively, with the pytorch 2.6 environment loaded, use the default entrypoint to launch a shell:
|
||||
|
||||
```bash
|
||||
docker compose run --rm ketr-chat shell
|
||||
```
|
||||
|
||||
Once in the shell, you can then launch the server.py:
|
||||
|
||||
```bash
|
||||
docker compose run --rm ketr-chat shell
|
||||
python src/server.py
|
||||
```
|
||||
|
||||
If you launch the server without any parameters, it will run the backend server, which will host the static web frontend built during the `docker compose build`.
|
||||
|
||||
That is the behavior if you up the container:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Jupyter
|
||||
|
||||
```bash
|
||||
docker compose up jupyter -d
|
||||
```
|
||||
|
||||
The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
|
||||
|
||||
To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
|
||||
|
||||
### Monitoring
|
||||
|
||||
You can run `ze-monitor` within the launched containers to monitor GPU usage.
|
||||
|
||||
```bash
|
||||
containers=($(docker ps --filter "ancestor=ketr-chat" --format "{{.ID}}"))
|
||||
if [[ ${#containers[*]} -eq 0 ]]; then
|
||||
echo "Running ketr-chat container not found."
|
||||
else
|
||||
for container in ${containers[@]}; do
|
||||
echo "Container ${container} devices:"
|
||||
docker exec -it ${container} ze-monitor
|
||||
done
|
||||
fi
|
||||
```
|
||||
|
||||
If an ketr-chat container is running, you should see something like:
|
||||
|
||||
```
|
||||
Container 5317c503e771 devices:
|
||||
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
```
|
||||
|
||||
You can then launch ze-monitor in that container specifying the device you wish to monitor:
|
||||
|
||||
```
|
||||
containers=($(docker ps --filter "ancestor=ketr-chat" --format "{{.ID}}"))
|
||||
docker exec -it ${containers[0]} ze-monitor --device 2
|
||||
```
|
@ -1,279 +0,0 @@
|
||||
# ze-monitor
|
||||
|
||||
A small utility to monitor Level Zero devices via
|
||||
[Level Zero Sysman](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/sysman/PROG.html#sysman-programming-guide)
|
||||
from the command line, similar to 'top'.
|
||||
|
||||
# Installation
|
||||
|
||||
Requires Ubuntu Oracular 24.10.
|
||||
|
||||
## Easiest
|
||||
|
||||
### Install prerequisites
|
||||
|
||||
This will add the [Intel Graphics Preview PPA](https://github.com/canonical/intel-graphics-preview) and install the required dependencies:
|
||||
|
||||
```bash
|
||||
sudo apt-get install -y \
|
||||
software-properties-common \
|
||||
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y \
|
||||
libze1 libze-intel-gpu1 libncurses6
|
||||
```
|
||||
|
||||
### Install ze-monitor from .deb package
|
||||
|
||||
This will download the ze-monitor GitHub, install it, and add the current
|
||||
user to the 'ze-monitor' group to allow running the utility:
|
||||
|
||||
```bash
|
||||
version=0.3.0-1
|
||||
wget https://github.com/jketreno/ze-monitor/releases/download/v${version}/ze-monitor-${version}_amd64.deb
|
||||
sudo dpkg -i ze-monitor-${version}_amd64.deb
|
||||
sudo usermod -a -G ze-monitor $(whoami)
|
||||
newgrp ze-monitor
|
||||
```
|
||||
|
||||
Congratulations! You can run ze-monitor:
|
||||
|
||||
```bash
|
||||
ze-monitor
|
||||
```
|
||||
|
||||
You should see something like:
|
||||
|
||||
```bash
|
||||
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
```
|
||||
|
||||
To monitor a device:
|
||||
|
||||
```bash
|
||||
ze-monitor --device 2
|
||||
```
|
||||
|
||||
Check the docs (`man ze-monitor`) for additional details on running the ze-monitor utility.
|
||||
|
||||
## Slightly more involved
|
||||
|
||||
This project uses docker containers to build. As this was originally written to monitor an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10). It will monitor any Level Zero device, even those using the i915 driver.
|
||||
|
||||
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
|
||||
|
||||
```
|
||||
git clone https://github.com/jketreno/ze-monitor.git
|
||||
cd ze-monitor
|
||||
docker compose build
|
||||
sudo apt install libze1 libncurses6
|
||||
version=$(cat src/version.txt)
|
||||
docker compose run --remove-orphans --rm \
|
||||
ze-monitor \
|
||||
cp /opt/ze-monitor-static/build/ze-monitor-${version}_amd64.deb \
|
||||
/opt/ze-monitor/build
|
||||
sudo dpkg -i build/ze-monitor-${version}_amd64.deb
|
||||
```
|
||||
|
||||
# Security
|
||||
|
||||
In order for ze-monitor to read the performance metric units (PMU) in the Linux kernel, it needs elevated permissions. The easiest way is to install the .deb package and add the user to the ze-monitor group. Or, run under sudo (eg., `sudo ze-monitor ...`.)
|
||||
|
||||
The specific capabilities required to monitor the GPU are documented in [Perf Security](https://www.kernel.org/doc/html/v5.1/admin-guide/perf-security.html) and [man capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). These include:
|
||||
|
||||
| Capability | Reason |
|
||||
|:--------------------|:-----------------------------------------------------|
|
||||
| CAP_DAC_READ_SEARCH | Bypass all filesystem read access checks |
|
||||
| CAP_PERFMON | Access to perf_events (vs. overloaded CAP_SYS_ADMIN) |
|
||||
| CAP_SYS_PTRACE | PTRACE_MODE_READ_REALCREDS ptrace access mode check |
|
||||
|
||||
To configure ze-monitor to run with those privileges, you can use `setcap` to set the correct capabilities on ze-monitor. You can further secure your system by creating a user group specifically for running the utility and restrict running of that command to users in that group. That is what the .deb package does.
|
||||
|
||||
If you install the .deb package from a [Release](https://github.com/jketreno/ze-monitor/releases) or by building it, that package will set the appropriate permissions for ze-monitor on installation and set it executable only to those in the 'ze-monitor' group.
|
||||
|
||||
## Anyone can run ze-monitor
|
||||
|
||||
If you build from source and want to set the capabilities:
|
||||
|
||||
```bash
|
||||
sudo setcap "cap_perfmon,cap_dac_read_search,cap_sys_ptrace=ep" build/ze-monitor
|
||||
getcap build/ze-monitor
|
||||
```
|
||||
|
||||
Any user can then run `build/ze-monitor` and monitor the GPU.
|
||||
|
||||
# Build outside container
|
||||
|
||||
## Prerequisites
|
||||
|
||||
If you would like to build outside of docker, you need the following packages installed:
|
||||
|
||||
```
|
||||
sudo apt-get install -y \
|
||||
build-essential \
|
||||
libfmt-dev \
|
||||
libncurses-dev
|
||||
```
|
||||
|
||||
In addition, you need the Intel drivers installed, which are available from the `kobuk-team/intel-graphics` PPA:
|
||||
|
||||
```
|
||||
sudo apt-get install -y \
|
||||
software-properties-common \
|
||||
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y \
|
||||
libze-intel-gpu1 \
|
||||
libze1 \
|
||||
libze-dev
|
||||
```
|
||||
## Building
|
||||
|
||||
```
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
```
|
||||
build/ze-monitor
|
||||
```
|
||||
|
||||
## Build and install .deb
|
||||
|
||||
In order to build the .deb package, you need the following packages installed:
|
||||
|
||||
```bash
|
||||
sudo apt-get install -y \
|
||||
debhelper \
|
||||
devscripts \
|
||||
rpm \
|
||||
rpm2cpio
|
||||
```
|
||||
|
||||
You can then build the .deb:
|
||||
|
||||
```bash
|
||||
if [ -d build ]; then
|
||||
cd build
|
||||
fi
|
||||
version=$(cat ../src/version.txt)
|
||||
cpack
|
||||
sudo dpkg -i build/packages/ze-monitor_${version}_amd64.deb
|
||||
```
|
||||
|
||||
You can then run ze-monitor from your path:
|
||||
|
||||
```bash
|
||||
ze-monitor
|
||||
```
|
||||
|
||||
# Developing
|
||||
|
||||
To run the built binary without building a full .deb package, you can build and run on the host by compiling in the container:
|
||||
|
||||
```
|
||||
docker compose run --rm ze-monitor build.sh
|
||||
build/ze-monitor
|
||||
```
|
||||
|
||||
The build.sh script will build the binary in /opt/ze-monitor/build, which is volume mounted to the host's build directory.
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
# Running
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
If running within a docker container, the container environment does not have access to the host's `/proc/fd`, which is necessary to obtain information about the processes outside the current container which are using the GPU. As such, only processes running within that container running ze-monitor will be listed as using the GPU.
|
||||
|
||||
## List available devices
|
||||
|
||||
```
|
||||
ze-monitor
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```bash
|
||||
$ ze-monitor
|
||||
Device 1: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
Device 2: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
```
|
||||
|
||||
## Show details for a given device
|
||||
|
||||
```
|
||||
sudo ze-monitor --info --device ( PCIID | # | BDF | UUID | /dev/dri/render*)
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```bash
|
||||
$ sudo ze-monitor --device 2 --info
|
||||
Device: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
UUID: 868080A7-0400-0000-0002-000000000000
|
||||
BDF: 0000:0000:0002:0000
|
||||
PCI ID: 8086:A780
|
||||
Subdevices: 0
|
||||
Serial Number: unknown
|
||||
Board Number: unknown
|
||||
Brand Name: unknown
|
||||
Model Name: Intel(R) UHD Graphics 770
|
||||
Vendor Name: Intel(R) Corporation
|
||||
Driver Version: 0CB7EFCAD5695B7EC5C8CE6
|
||||
Type: GPU
|
||||
Is integrated with host: Yes
|
||||
Is a sub-device: No
|
||||
Supports error correcting memory: No
|
||||
Supports on-demand page-faulting: No
|
||||
Engines: 7
|
||||
Engine 1: ZES_ENGINE_GROUP_RENDER_SINGLE
|
||||
Engine 2: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
|
||||
Engine 3: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
|
||||
Engine 4: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
|
||||
Engine 5: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
|
||||
Engine 6: ZES_ENGINE_GROUP_COPY_SINGLE
|
||||
Engine 7: ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE
|
||||
Temperature Sensors: 0
|
||||
```
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
## Monitor a given device
|
||||
|
||||
```
|
||||
sudo ze-monitor --device ( PCIID | # | BDF | UUID | /dev/dri/render* ) \
|
||||
--interval ms
|
||||
```
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
Output:
|
||||
|
||||
```bash
|
||||
$ sudo ze-monitor --device 2 --interval 500
|
||||
Device: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
Total Memory: 12809404416
|
||||
Free memory: [# 55% ############################ ]
|
||||
Power usage: 165.0W
|
||||
------------------------------------------------------------------------------------------
|
||||
PID COMMAND-LINE
|
||||
USED MEMORY SHARED MEMORY ENGINE FLAGS
|
||||
------------------------------------------------------------------------------------------
|
||||
1 /sbin/init splash
|
||||
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
|
||||
1606 /usr/lib/systemd/systemd-logind
|
||||
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
|
||||
5164 /usr/bin/gnome-shell
|
||||
MEM: 530513920 SHR: 503316480 FLAGS: RENDER COMPUTE
|
||||
5237 /usr/bin/Xwayland :1024 -rootless -nores...isplayfd 6 -initfd 7 -byteswappedclients
|
||||
MEM: 0 SHR: 0 FLAGS:
|
||||
40480 python chat.py
|
||||
MEM: 5544226816 SHR: 0 FLAGS: DMA COMPUTE
|
||||
```
|
||||
|
||||
If you pass `--one-shot`, statistics will be gathered, displayed, and then ze-monitor will exit.
|
@ -1,195 +0,0 @@
|
||||
div {
|
||||
box-sizing: border-box
|
||||
}
|
||||
|
||||
.SystemInfo {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
padding: 5px;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.SystemInfoItem {
|
||||
display: flex; /* Grid for individual items */
|
||||
flex-direction: row;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.SystemInfoItem > div:first-child {
|
||||
display: flex;
|
||||
justify-self: end; /* Align the first column content to the right */
|
||||
width: 10rem;
|
||||
}
|
||||
|
||||
.SystemInfoItem > div:last-child {
|
||||
display: flex;
|
||||
flex-grow: 1;
|
||||
justify-self: end; /* Align the first column content to the right */
|
||||
}
|
||||
|
||||
.ChatBox {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
flex-grow: 1;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.Controls {
|
||||
display: flex;
|
||||
background-color: #F5F5F5;
|
||||
border: 1px solid #E0E0E0;
|
||||
overflow-y: auto;
|
||||
padding: 10px;
|
||||
flex-direction: column;
|
||||
margin-left: 10px;
|
||||
box-sizing: border-box;
|
||||
overflow-x: visible;
|
||||
min-width: 10rem;
|
||||
width: 100%;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
.Controls {
|
||||
width: 600px; /* or whatever you prefer for a desktop */
|
||||
max-width: 80vw; /* Optional: Prevent it from taking up too much space */
|
||||
}
|
||||
}
|
||||
|
||||
.Conversation {
|
||||
display: flex;
|
||||
background-color: #F5F5F5;
|
||||
border: 1px solid #E0E0E0;
|
||||
flex-grow: 1;
|
||||
overflow-y: auto;
|
||||
padding: 10px;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.user-message.MuiCard-root {
|
||||
background-color: #DCF8C6;
|
||||
border: 1px solid #B2E0A7;
|
||||
color: #333333;
|
||||
margin-bottom: 0.75rem;
|
||||
margin-left: 1rem;
|
||||
border-radius: 0.25rem;
|
||||
min-width: 80%;
|
||||
max-width: 80%;
|
||||
justify-self: right;
|
||||
display: flex;
|
||||
white-space: pre-wrap;
|
||||
overflow-wrap: break-word;
|
||||
word-break: break-word;
|
||||
flex-direction: column;
|
||||
align-items: self-end;
|
||||
align-self: end;
|
||||
flex-grow: 0;
|
||||
}
|
||||
|
||||
.assistant-message.MuiCard-root {
|
||||
border: 1px solid #E0E0E0;
|
||||
background-color: #FFFFFF;
|
||||
color: #333333;
|
||||
margin-bottom: 0.75rem;
|
||||
margin-right: 1rem;
|
||||
min-width: 70%;
|
||||
border-radius: 0.25rem;
|
||||
justify-self: left;
|
||||
display: flex;
|
||||
white-space: pre-wrap;
|
||||
overflow-wrap: break-word;
|
||||
word-break: break-word;
|
||||
flex-direction: column;
|
||||
flex-grow: 0;
|
||||
padding: 16px 0;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.assistant-message .MuiCardContent-root {
|
||||
padding: 0 16px !important;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.assistant-message span {
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.user-message .MuiCardContent-root:last-child,
|
||||
.assistant-message .MuiCardContent-root:last-child {
|
||||
padding: 16px;
|
||||
}
|
||||
|
||||
.users > div {
|
||||
padding: 0.25rem;
|
||||
}
|
||||
|
||||
.user-active {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.metadata {
|
||||
border: 1px solid #E0E0E0;
|
||||
font-size: 0.75rem;
|
||||
padding: 0.125rem;
|
||||
}
|
||||
|
||||
/* Reduce general whitespace in markdown content */
|
||||
.assistant-message p.MuiTypography-root {
|
||||
margin-top: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
/* Reduce space between headings and content */
|
||||
.assistant-message h1.MuiTypography-root,
|
||||
.assistant-message h2.MuiTypography-root,
|
||||
.assistant-message h3.MuiTypography-root,
|
||||
.assistant-message h4.MuiTypography-root,
|
||||
.assistant-message h5.MuiTypography-root,
|
||||
.assistant-message h6.MuiTypography-root {
|
||||
margin-top: 1rem;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
/* Reduce space in lists */
|
||||
.assistant-message ul.MuiTypography-root,
|
||||
.assistant-message ol.MuiTypography-root {
|
||||
margin-top: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.assistant-message li.MuiTypography-root {
|
||||
margin-bottom: 0.25rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.assistant-message .MuiTypography-root li {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
padding: 0;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
/* Reduce space around code blocks */
|
||||
.assistant-message .MuiTypography-root pre {
|
||||
border: 1px solid #F5F5F5;
|
||||
border-radius: 0.5rem;
|
||||
padding: 0.5rem 0.75rem;
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.PromptStats .MuiTableCell-root {
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
#SystemPromptInput {
|
||||
font-size: 0.9rem;
|
||||
line-height: 1.25rem;
|
||||
}
|
180
src/server.py
@ -6,14 +6,8 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
import textwrap
|
||||
import threading
|
||||
import uuid
|
||||
import random
|
||||
import subprocess
|
||||
import re
|
||||
import math
|
||||
@ -26,29 +20,15 @@ def try_import(module_name, pip_name=None):
|
||||
print(f" pip install {pip_name or module_name}")
|
||||
|
||||
# Third-party modules with import checks
|
||||
try_import('gradio')
|
||||
try_import('ollama')
|
||||
try_import('pytz')
|
||||
try_import('requests')
|
||||
try_import('yfinance', 'yfinance')
|
||||
try_import('dotenv', 'python-dotenv')
|
||||
try_import('geopy', 'geopy')
|
||||
try_import('hyphen', 'PyHyphen')
|
||||
try_import('bs4', 'beautifulsoup4')
|
||||
try_import('nltk')
|
||||
try_import('fastapi')
|
||||
|
||||
import nltk
|
||||
from dotenv import load_dotenv
|
||||
from geopy.geocoders import Nominatim
|
||||
import gradio as gr
|
||||
import ollama
|
||||
import pytz
|
||||
import requests
|
||||
import yfinance as yf
|
||||
from hyphen import hyphenator
|
||||
from bs4 import BeautifulSoup
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
@ -143,7 +123,7 @@ MODEL_NAME = defines.model
|
||||
LOG_LEVEL="info"
|
||||
USE_TLS=False
|
||||
WEB_HOST="0.0.0.0"
|
||||
WEB_PORT=5000
|
||||
WEB_PORT=8911
|
||||
DEFAULT_HISTORY_LENGTH=5
|
||||
|
||||
# %%
|
||||
@ -157,14 +137,37 @@ When answering queries, follow these steps:
|
||||
1. First analyze the query to determine if real-time information might be helpful
|
||||
2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information
|
||||
3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available
|
||||
4. When presenting information like weather forecasts, include relevant emojis immediately before the corresponding text. For example, for a sunny day, say \"☀️ Sunny\" or if the forecast says there will be \"rain showers, say \"🌧️ Rain showers\". Use this mapping for weather emojis: Sunny: ☀️, Cloudy: ☁️, Rainy: 🌧️, Snowy: ❄️
|
||||
4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer
|
||||
5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools
|
||||
6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data
|
||||
|
||||
Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so.
|
||||
|
||||
""".strip()
|
||||
|
||||
system_generate_resume = f"""
|
||||
You are a professional resume writer. Your task is to write a poliched, tailored resume for a specific job based only on the individual's [WORK HISTORY].
|
||||
|
||||
When answering queries, follow these steps:
|
||||
|
||||
1. You must not invent or assume any inforation not explicitly present in the [WORK HISTORY].
|
||||
2. Analyze the [JOB DESCRIPTION] to identify skills required for the job.
|
||||
3. Use the [JOB DESCRIPTION] provided to guide the focus, tone, and relevant skills or experience to highlight.
|
||||
4. Identify and emphasisze the experiences, achievements, and responsibilities from the [WORK HISTORY] that best align with the [JOB DESCRIPTION].
|
||||
5. Do not use the [JOB DESCRIPTION] skills as skills the user posseses unless listed in [WORK HISTORY].
|
||||
|
||||
Structure the resume professionally with the following sections where applicable:
|
||||
|
||||
* "Name: Use full name."
|
||||
* "Professional Summary: A 2-4 sentence overview tailored to the job."
|
||||
* "Skills: A bullet list of key skills derived from the work history and relevant to the job."
|
||||
* Professional Experience: A detailed list of roles, achievements, and responsibilities from the work history that relate to the job."
|
||||
* Education: Include only if available in the work history."
|
||||
|
||||
Do not include any information unless it is supported by the provided [WORK HISTORY].
|
||||
Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes.
|
||||
"""
|
||||
|
||||
tool_log = []
|
||||
command_log = []
|
||||
model = None
|
||||
@ -445,6 +448,9 @@ class WebServer:
|
||||
|
||||
@self.app.post('/api/chat/{context_id}')
|
||||
async def chat_endpoint(context_id: str, request: Request):
|
||||
if not is_valid_uuid(context_id):
|
||||
logging.warning(f"Invalid context_id: {context_id}")
|
||||
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
|
||||
context = self.upsert_context(context_id)
|
||||
data = await request.json()
|
||||
|
||||
@ -468,7 +474,36 @@ class WebServer:
|
||||
"X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@self.app.post('/api/generate-resume/{context_id}')
|
||||
async def post_generate_resume(context_id: str, request: Request):
|
||||
if not is_valid_uuid(context_id):
|
||||
logging.warning(f"Invalid context_id: {context_id}")
|
||||
return JSONResponse({"error": "Invalid context_id"}, status_code=400)
|
||||
context = self.upsert_context(context_id)
|
||||
data = await request.json()
|
||||
|
||||
# Create a custom generator that ensures flushing
|
||||
async def flush_generator():
|
||||
async for message in self.generate_resume(context=context, content=data['content']):
|
||||
# Convert to JSON and add newline
|
||||
yield json.dumps(message) + "\n"
|
||||
# Save the history as its generated
|
||||
self.save_context(context_id)
|
||||
# Explicitly flush after each yield
|
||||
await asyncio.sleep(0) # Allow the event loop to process the write
|
||||
|
||||
# Return StreamingResponse with appropriate headers
|
||||
return StreamingResponse(
|
||||
flush_generator(),
|
||||
media_type="application/json",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it
|
||||
}
|
||||
)
|
||||
|
||||
@self.app.post('/api/context')
|
||||
async def create_context():
|
||||
context = self.create_context()
|
||||
@ -542,12 +577,12 @@ class WebServer:
|
||||
|
||||
@self.app.get('/{path:path}')
|
||||
async def serve_static(path: str):
|
||||
full_path = os.path.join('/opt/airc/ketr-chat/build', path)
|
||||
full_path = os.path.join(defines.static_content, path)
|
||||
if os.path.exists(full_path) and os.path.isfile(full_path):
|
||||
self.logging.info(f"Serve static request for {full_path}")
|
||||
return FileResponse(full_path)
|
||||
self.logging.info(f"Serve index.html for {path}")
|
||||
return FileResponse('/opt/airc/ketr-chat/build/index.html')
|
||||
return FileResponse(os.path.join(defines.static_content, 'index.html'))
|
||||
|
||||
import requests
|
||||
|
||||
@ -565,11 +600,11 @@ class WebServer:
|
||||
context = self.upsert_context(session_id)
|
||||
|
||||
# Create sessions directory if it doesn't exist
|
||||
if not os.path.exists("sessions"):
|
||||
os.makedirs("sessions")
|
||||
if not os.path.exists(defines.session_dir):
|
||||
os.makedirs(defines.session_dir)
|
||||
|
||||
# Create the full file path
|
||||
file_path = os.path.join("sessions", session_id)
|
||||
file_path = os.path.join(defines.session_dir, session_id)
|
||||
|
||||
# Serialize the data to JSON and write to file
|
||||
with open(file_path, 'w') as f:
|
||||
@ -587,7 +622,7 @@ class WebServer:
|
||||
Returns:
|
||||
The deserialized dictionary, or a new context if it doesn't exist on disk.
|
||||
"""
|
||||
file_path = os.path.join("sessions", session_id)
|
||||
file_path = os.path.join(defines.session_dir, session_id)
|
||||
|
||||
# Check if the file exists
|
||||
if not os.path.exists(file_path):
|
||||
@ -606,9 +641,11 @@ class WebServer:
|
||||
context = {
|
||||
"id": context_id,
|
||||
"system": system_context,
|
||||
"system_generate_resume": system_generate_resume,
|
||||
"llm_history": [],
|
||||
"user_history": [],
|
||||
"tools": default_tools(tools),
|
||||
"resume_history": [],
|
||||
"rags": rags.copy(),
|
||||
"context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage
|
||||
"message_history_length": 5 # Number of messages to supply in context
|
||||
@ -681,10 +718,9 @@ class WebServer:
|
||||
messages = context["system"] + llm_history
|
||||
|
||||
try:
|
||||
yield {"status": "processing", "message": "Processing request..."}
|
||||
|
||||
# Estimate token length of new messages
|
||||
ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=llm_history[-1]["content"])
|
||||
yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
|
||||
|
||||
# Use the async generator in an async for loop
|
||||
response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': ctx_size })
|
||||
@ -734,9 +770,9 @@ class WebServer:
|
||||
|
||||
metadata["tools"] = tools_used
|
||||
|
||||
yield {"status": "processing", "message": "Generating final response..."}
|
||||
# Estimate token length of new messages
|
||||
ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=messages[pre_add_index:])
|
||||
yield {"status": "processing", "message": "Generating final response...", "num_ctx": ctx_size }
|
||||
# Decrease creativity when processing tool call requests
|
||||
response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': ctx_size }) #, "temperature": 0.5 })
|
||||
metadata["eval_count"] += response['eval_count']
|
||||
@ -756,7 +792,7 @@ class WebServer:
|
||||
user_history.append(final_message)
|
||||
|
||||
# Return the REST API with metadata
|
||||
yield {"status": "done", "message": final_message, "metadata": metadata}
|
||||
yield {"status": "done", "message": final_message }
|
||||
|
||||
except Exception as e:
|
||||
logging.exception({ 'model': self.model, 'messages': messages, 'error': str(e) })
|
||||
@ -765,7 +801,79 @@ class WebServer:
|
||||
finally:
|
||||
self.processing = False
|
||||
|
||||
def run(self, host='0.0.0.0', port=5000, **kwargs):
|
||||
async def generate_resume(self, context, content):
|
||||
content = content.strip()
|
||||
if not content:
|
||||
yield {"status": "error", "message": "Invalid request"}
|
||||
return
|
||||
|
||||
if self.processing:
|
||||
yield {"status": "error", "message": "Busy"}
|
||||
return
|
||||
|
||||
self.processing = True
|
||||
resume_history = context["resume_history"]
|
||||
|
||||
metadata = {
|
||||
"rag": {},
|
||||
"tools": [],
|
||||
"eval_count": 0,
|
||||
"eval_duration": 0,
|
||||
"prompt_eval_count": 0,
|
||||
"prompt_eval_duration": 0,
|
||||
}
|
||||
rag_docs = []
|
||||
for rag in context["rags"]:
|
||||
if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now...
|
||||
yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."}
|
||||
chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10)
|
||||
if chroma_results:
|
||||
rag_docs.extend(chroma_results["documents"])
|
||||
metadata["rag"] = { "name": rag["name"], **chroma_results }
|
||||
preamble = f"The current time is {DateTime()}\n"
|
||||
if len(rag_docs):
|
||||
preamble = f"""[WORK HISTORY]:\n"""
|
||||
for doc in rag_docs:
|
||||
preamble += doc
|
||||
preamble += f"\n[/WORK HISTORY]\n"
|
||||
|
||||
content = f"{preamble}\nUse the above WORK HISTORY to create the resume for this JOB DESCRIPTION. Do not use the JOB DESCRIPTION skills as skills the user posseses unless listed in WORK HISTORY:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n"
|
||||
|
||||
try:
|
||||
# Estimate token length of new messages
|
||||
ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_generate_resume, content])
|
||||
|
||||
yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size}
|
||||
|
||||
# Use the async generator in an async for loop
|
||||
response = self.client.generate(model=self.model, system=system_generate_resume, prompt=content, options={ 'num_ctx': ctx_size })
|
||||
metadata["eval_count"] += response['eval_count']
|
||||
metadata["eval_duration"] += response['eval_duration']
|
||||
metadata["prompt_eval_count"] += response['prompt_eval_count']
|
||||
metadata["prompt_eval_duration"] += response['prompt_eval_duration']
|
||||
context["context_tokens"] = response['prompt_eval_count'] + response['eval_count']
|
||||
|
||||
reply = response['response']
|
||||
final_message = {"role": "assistant", "content": reply, "metadata": metadata }
|
||||
|
||||
resume_history.append({
|
||||
'job_description': content,
|
||||
'resume': reply,
|
||||
'metadata': metadata
|
||||
})
|
||||
|
||||
# Return the REST API with metadata
|
||||
yield {"status": "done", "message": final_message }
|
||||
|
||||
except Exception as e:
|
||||
logging.exception({ 'model': self.model, 'content': content, 'error': str(e) })
|
||||
yield {"status": "error", "message": f"An error occurred: {str(e)}"}
|
||||
|
||||
finally:
|
||||
self.processing = False
|
||||
|
||||
|
||||
def run(self, host='0.0.0.0', port=WEB_PORT, **kwargs):
|
||||
import uvicorn
|
||||
uvicorn.run(self.app, host=host, port=port)
|
||||
|
||||
@ -783,7 +891,7 @@ def main():
|
||||
client = ollama.Client(host=args.ollama_server)
|
||||
model = args.ollama_model
|
||||
|
||||
documents = Rag.load_text_files("doc")
|
||||
documents = Rag.load_text_files(defines.doc_dir)
|
||||
print(f"Documents loaded {len(documents)}")
|
||||
collection = Rag.get_vector_collection()
|
||||
chunks = Rag.create_chunks_from_documents(documents)
|
||||
|
@ -4,5 +4,8 @@ ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
||||
#model = "llama3.2"
|
||||
model="qwen2.5:7b"
|
||||
encoding_model="mxbai-embed-large"
|
||||
persist_directory="./chromadb"
|
||||
max_context = 2048*8*2
|
||||
persist_directory="/root/.cache/chromadb"
|
||||
max_context = 2048*8*2
|
||||
doc_dir = "/opt/backstory/docs/"
|
||||
session_dir = "/opt/backstory/sessions"
|
||||
static_content = '/opt/backstory/frontend/build'
|
@ -104,7 +104,7 @@ if __name__ == "__main__":
|
||||
# When running directly, use absolute imports
|
||||
import defines
|
||||
llm = ollama.Client(host=defines.ollama_api_url)
|
||||
documents = load_text_files("doc")
|
||||
documents = load_text_files(defines.doc_dir)
|
||||
print(f"Documents loaded {len(documents)}")
|
||||
collection = get_vector_collection()
|
||||
chunks = create_chunks_from_documents(documents)
|
||||
@ -113,5 +113,6 @@ if __name__ == "__main__":
|
||||
print(f"Document types: {doc_types}")
|
||||
print(f"Vectorstore created with {collection.count()} documents")
|
||||
query = "Can you describe James Ketrenos' work history?"
|
||||
top_docs = find_similar(llm, query, top_k=3)
|
||||
top_docs = find_similar(llm, collection, query, top_k=3)
|
||||
print(top_docs)
|
||||
|
||||
|