diff --git a/.gitignore b/.gitignore index 68fe6ab..b929a23 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .env cache/** jupyter/** -ollama/** \ No newline at end of file +ollama/** +sessions/** diff --git a/Dockerfile b/Dockerfile index 683efc6..2883eae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -133,20 +133,20 @@ RUN pip install requests wheel RUN python setup.py clean --all bdist_wheel --linux # -# The main airc image: +# The main backstory image: # * python 3.11 # * pytorch xpu w/ ipex-llm # * ollama-ipex-llm # * src/server.py - model server supporting RAG and fine-tuned models # # Agents using server: -# * src/web-ui.py - REACT server (airc.ketrenos.com) -# * src/irc.py - IRC backend (irc.libera.chat #airc-test) +# * src/web-ui.py - REACT server (backstory.ketrenos.com) +# * src/irc.py - IRC backend (irc.libera.chat #backstory-test) # * src/cli.py - Command line chat # # Utilities: # * src/training-fine-tune.py - Perform fine-tuning on currated documents -FROM ubuntu:oracular AS airc +FROM ubuntu:oracular AS backstory COPY --from=python-build /opt/python /opt/python @@ -184,25 +184,25 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} -WORKDIR /opt/airc +WORKDIR /opt/backstory RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2 # Setup the ollama python virtual environment -RUN python3 -m venv --system-site-packages /opt/airc/venv +RUN python3 -m venv --system-site-packages /opt/backstory/venv # Setup the docker pip shell RUN { \ echo '#!/bin/bash' ; \ echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ - echo 'source /opt/airc/venv/bin/activate' ; \ + echo 'source /opt/backstory/venv/bin/activate' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \ - } > /opt/airc/shell ; \ - chmod +x /opt/airc/shell + } > /opt/backstory/shell ; \ + chmod +x /opt/backstory/shell # Activate the pip environment on all shell calls -SHELL [ "/opt/airc/shell" ] +SHELL [ "/opt/backstory/shell" ] # From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip @@ -246,24 +246,15 @@ RUN pip install einops diffusers # Required for IPEX optimize(), which is requir # Install packages needed for stock.py RUN pip install yfinance pyzt geopy PyHyphen nltk +# While running in development mode via bind mounts, don't copy +# the source or follow on containers will always rebuild whenever +# the source changes. +#COPY /src/ /opt/backstory/src/ +COPY /src/requirements.txt /opt/backstory/src/requirements.txt +RUN pip install -r /opt/backstory/src/requirements.txt + SHELL [ "/bin/bash", "-c" ] -# Don't install the full oneapi essentials; just the ones that we seem to need -# RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ -# | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \ -# && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \ -# | tee /etc/apt/sources.list.d/oneAPI.list \ -# && apt-get update \ -# && DEBIAN_FRONTEND=noninteractive apt-get install -y \ -# intel-oneapi-mkl-sycl-2025.0 \ -# intel-oneapi-dnnl-2025.0 \ -# intel-oneapi-dpcpp-cpp-2025.0 \ -# && apt-get clean \ -# && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} -# dpcpp is needed for LoRA backend when -# libze-dev is needed for LoRA/triton backend in order to build stuff -# Unfortunately, that fails with: -# ImportError: /opt/airc/venv/lib/python3.11/site-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ libncurses6 \ @@ -274,24 +265,18 @@ COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/ RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb RUN usermod -aG ze-monitor root -# While running in development mode via bind mounts, don't copy -# the source or follow on containers will always rebuild whenever -# the source changes. -#COPY /src/ /opt/airc/src/ -COPY /src/requirements.txt /opt/airc/src/requirements.txt - SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash'; \ - echo 'echo "Container: airc"'; \ + echo 'echo "Container: backstory"'; \ echo 'set -e'; \ - echo 'echo "Setting pip environment to /opt/airc"'; \ + echo 'echo "Setting pip environment to /opt/backstory"'; \ echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ - echo 'source /opt/airc/venv/bin/activate'; \ + echo 'source /opt/backstory/venv/bin/activate'; \ echo ''; \ - echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \ + echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ echo ' shift' ; \ echo ' echo "Running: ${@}"' ; \ @@ -301,8 +286,11 @@ RUN { \ echo ' exec /bin/bash'; \ echo ' fi' ; \ echo 'else'; \ - echo ' echo "Launching AIRC chat server..."'; \ - echo ' python src/airc.py "${@}"' ; \ + echo ' while true; do'; \ + echo ' echo "Launching Backstory server..."'; \ + echo ' python src/server.py "${@}" || echo "Backstory server died. Restarting in 3 seconds."'; \ + echo ' sleep 3'; \ + echo ' done' ; \ echo 'fi'; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh @@ -422,9 +410,9 @@ ENV PATH=/opt/ollama:${PATH} ENTRYPOINT [ "/entrypoint.sh" ] -FROM airc AS jupyter +FROM backstory AS jupyter -SHELL [ "/opt/airc/shell" ] +SHELL [ "/opt/backstory/shell" ] # BEGIN setup Jupyter RUN pip install \ @@ -433,13 +421,13 @@ RUN pip install \ && jupyter lab build --dev-build=False --minimize=False # END setup Jupyter -RUN pip install -r /opt/airc/src/requirements.txt +RUN pip install -r /opt/backstory/src/requirements.txt SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash' ; \ - echo 'echo "Container: airc jupyter"' ; \ + echo 'echo "Container: backstory jupyter"' ; \ echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \ echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \ echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=" >&2' ; \ @@ -451,7 +439,7 @@ RUN { \ echo 'fi' ; \ echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ - echo 'source /opt/airc/venv/bin/activate' ; \ + echo 'source /opt/backstory/venv/bin/activate' ; \ echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \ echo 'while true; do' ; \ echo ' echo "Launching jupyter lab"' ; \ diff --git a/README.md b/README.md index 7a9c7a5..544c858 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# AIRC (pronounced Eric) +# Backstory -AI is Really Cool +Backstory is an AI Resume agent that provides context into a diverse career narative. -This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds. +This project provides an AI chat client. While it can run a variety of LLM models, it is currently running Qwen2.5:7b. In addition to the standard model, enhanced with a RAG expert system that will chunk and embed any text files in `./docs`. It also exposes several utility tools for the LLM to use to obtain real-time data. -Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.) +Internally, it is built using PyTorch 2.6, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.) -NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them. +NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/backstory/issues)--I have some routines I can put in, but don't have a way to test them. # Installation @@ -26,8 +26,8 @@ NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu] ```bash -git clone https://github.com/jketreno/airc -cd airc +git clone https://github.com/jketreno/backstory +cd backstory docker compose build ``` @@ -37,12 +37,12 @@ This project provides the following containers: | Container | Purpose | |:----------|:---------------------------------------------------------------| -| airc | Base container with GPU packages installed and configured | -| jupyter | airc + Jupyter notebook for running Jupyter sessions | +| backstory | Base container with GPU packages installed and configured. Main server entry point. Also used for frontend development. | +| jupyter | backstory + Jupyter notebook for running Jupyter sessions | | miniircd | Tiny deployment of an IRC server for testing IRC agents | | ollama | Installation of Intel's pre-built Ollama.cpp | -While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code) +While developing Backstory, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code) To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search). @@ -83,33 +83,43 @@ directory which will enable model downloads to be persisted. NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container. -### AIRC +### Backstory -To launch the airc shell interactively, with the pytorch 2.6 environment loaded, use the default entrypoint to launch a shell: +If you just want to run the pre-built environment, you can run: ```bash -docker compose run --rm airc shell +docker compose up -d ``` -Once in the shell, you can then launch the model-server.py and then the airc.py client: +That will launch all the required containers. Once loaded, the following ports are exposed: + +#### Container: backstory + +* 8911 - http for the chat server. If you want https (recommended) then you should use an nginx reverse proxy to provide this endpoint. See src/server.py WEB_PORT and docker-compose `ports` under the `backstory` service. This port is safe to be exposed to the Internet if you want to expose this from your own service. +* 3000 - During interactive development of the frontend, the React server can be found at this port. By default, static content is served through port 8911. Do not expose this port to the Internet. + +#### Container: jupyter + +* 8888 - Jupyter Notebook. You can access this port for a Juptyer notebook running on top of the `backstory` base container. +* 60673 - This allows you to connect to Gradio apps from outside the container, provided you launch the Gradio on port 60673 `.launch(server_name="0.0.0.0", server_port=60673)` + +#### Container: ollama + +* 11434 - ollama server port. This should not be exposed to the Internet. You can use it via curl/wget locally. The `backstory` and `jupyter` containers are on the same Docker network, so they do not need this port exposed if you don't want it. See docker-compose.yml `ports` under `ollama`. + +Once the above is running, to launch the backstory shell interactively: ```bash -docker compose run --rm airc shell -src/airc.py --ai-server=http://localhost:5000 & -src/model-server.py +docker compose exec --it backstory shell ``` -By default, src/airc.py will connect to irc.libera.chat on the airc-test channel. See `python src/airc.py --help` for options. - -By separating the model-server into its own process, you can develop and tweak the chat backend without losing the IRC connection established by airc. - ### Jupyter ```bash docker compose up jupyter -d ``` -The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default. +The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to `/opt/jupyter` in the container, which is where notebooks will be saved by default. To access the jupyter notebook, go to `https://localhost:8888/jupyter`. @@ -118,28 +128,17 @@ To access the jupyter notebook, go to `https://localhost:8888/jupyter`. You can run `ze-monitor` within the launched containers to monitor GPU usage. ```bash -containers=($(docker ps --filter "ancestor=airc" --format "{{.ID}}")) -if [[ ${#containers[*]} -eq 0 ]]; then - echo "Running airc container not found." -else - for container in ${containers[@]}; do - echo "Container ${container} devices:" - docker exec -it ${container} ze-monitor - done -fi +docker compose exec backstory ze-monitor --list ``` -If an airc container is running, you should see something like: - ``` Container 5317c503e771 devices: Device 1: 8086:A780 (Intel(R) UHD Graphics 770) Device 2: 8086:E20B (Intel(R) Graphics [0xe20b]) ``` -You can then launch ze-monitor in that container specifying the device you wish to monitor: +To monitor a device: -``` -containers=($(docker ps --filter "ancestor=airc" --format "{{.ID}}")) -docker exec -it ${containers[0]} ze-monitor --device 2 +```bash +docker compose exec backstory ze-monitor --device 2 ``` \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 5a30d46..cc96801 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,10 @@ services: - airc: + backstory: build: context: . dockerfile: Dockerfile - target: airc - image: airc + target: backstory + image: backstory restart: "no" env_file: - .env @@ -15,13 +15,14 @@ services: networks: - internal ports: - - 8911:8911 + - 8911:8911 # Flask React server + - 3000:3000 # REACT expo while developing frontend volumes: - - ./cache:/root/.cache - - ./src:/opt/airc/src:rw - - ./doc:/opt/airc/doc:ro - - ./results:/opt/airc/results:rw - - ./ketr-chat:/opt/airc/ketr-chat:rw # Live mount src + - ./cache:/root/.cache # Persist all models and GPU kernel cache + - ./sessions:/opt/backstory/sessions:rw # Persist sessions + - ./docs:/opt/backstory/docs:ro # Live mount of RAG content + - ./src:/opt/backstory/src:rw # Live mount server src + - ./frontend:/opt/backstory/frontend:rw # Live mount frontend src cap_add: # used for running ze-monitor within container - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) @@ -69,15 +70,11 @@ services: ports: - 8888:8888 # Jupyter Notebook - 60673:60673 # Gradio - - 5000:5000 # Flask React server - - 3000:3000 # REACT expo networks: - internal volumes: - ./jupyter:/opt/jupyter:rw - ./cache:/root/.cache - - ./src:/opt/airc/src:rw # Live mount src - - ./ketr-chat:/opt/airc/ketr-chat:rw # Live mount src deploy: resources: limits: diff --git a/ketr-chat/.gitignore b/frontend/.gitignore similarity index 100% rename from ketr-chat/.gitignore rename to frontend/.gitignore diff --git a/ketr-chat/README.md b/frontend/README.md similarity index 100% rename from ketr-chat/README.md rename to frontend/README.md diff --git a/frontend/favicon.ico b/frontend/favicon.ico new file mode 100755 index 0000000..48af0e0 Binary files /dev/null and b/frontend/favicon.ico differ diff --git a/frontend/favicon.png b/frontend/favicon.png new file mode 100644 index 0000000..d1204df Binary files /dev/null and b/frontend/favicon.png differ diff --git a/ketr-chat/package-lock.json b/frontend/package-lock.json similarity index 99% rename from ketr-chat/package-lock.json rename to frontend/package-lock.json index ac4a5ee..1442e65 100644 --- a/ketr-chat/package-lock.json +++ b/frontend/package-lock.json @@ -1,11 +1,11 @@ { - "name": "ketr-chat", + "name": "airc", "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ketr-chat", + "name": "airc", "version": "0.1.0", "dependencies": { "@emotion/react": "^11.14.0", diff --git a/ketr-chat/package.json b/frontend/package.json similarity index 98% rename from ketr-chat/package.json rename to frontend/package.json index ef15fe7..5af860c 100644 --- a/ketr-chat/package.json +++ b/frontend/package.json @@ -1,5 +1,5 @@ { - "name": "ketr-chat", + "name": "airc", "version": "0.1.0", "private": true, "dependencies": { diff --git a/ketr-chat/public/adaptive-icon.png b/frontend/public/adaptive-icon.png similarity index 100% rename from ketr-chat/public/adaptive-icon.png rename to frontend/public/adaptive-icon.png diff --git a/frontend/public/backstory_favicon.ico b/frontend/public/backstory_favicon.ico new file mode 100755 index 0000000..cc83850 Binary files /dev/null and b/frontend/public/backstory_favicon.ico differ diff --git a/ketr-chat/public/disable-jpk.png b/frontend/public/disable-jpk.png similarity index 100% rename from ketr-chat/public/disable-jpk.png rename to frontend/public/disable-jpk.png diff --git a/frontend/public/docs/about.md b/frontend/public/docs/about.md new file mode 100644 index 0000000..621ec16 --- /dev/null +++ b/frontend/public/docs/about.md @@ -0,0 +1,7 @@ +# About Backstory + +This application was developed to achieve a few goals: + +1. See if it is realistic to self-host AI LLMs. Turns out, it is -- with constraints. +2. Provide a recent example of my capabilities; many of my projects while working for Intel were internally facing. The source code to this project is available on [GitHub](https://github.com/jketreno/backstory). +3. My career at Intel was diverse. Over the years, I have worked on many projects almost everywhere in the computer ecosystem. That results in a resume that is either too long, or too short. This application is intended to provide a quick way for employers to ask the LLM about me. \ No newline at end of file diff --git a/frontend/public/favicon.ico b/frontend/public/favicon.ico new file mode 100755 index 0000000..b5e7fd6 Binary files /dev/null and b/frontend/public/favicon.ico differ diff --git a/frontend/public/favicon.png b/frontend/public/favicon.png new file mode 100755 index 0000000..a3b4495 Binary files /dev/null and b/frontend/public/favicon.png differ diff --git a/ketr-chat/public/icon.png b/frontend/public/icon.png similarity index 100% rename from ketr-chat/public/icon.png rename to frontend/public/icon.png diff --git a/ketr-chat/public/index.html b/frontend/public/index.html similarity index 97% rename from ketr-chat/public/index.html rename to frontend/public/index.html index c0e81d0..96ae9ba 100644 --- a/ketr-chat/public/index.html +++ b/frontend/public/index.html @@ -24,7 +24,7 @@ work correctly both with client-side routing and a non-root public URL. Learn how to configure a non-root public URL by running `npm run build`. --> - ai.ketrenos.com + Backstory diff --git a/frontend/public/logo.png b/frontend/public/logo.png new file mode 100755 index 0000000..ae6209f Binary files /dev/null and b/frontend/public/logo.png differ diff --git a/frontend/public/logo192.png b/frontend/public/logo192.png new file mode 100644 index 0000000..1b3f168 Binary files /dev/null and b/frontend/public/logo192.png differ diff --git a/frontend/public/logo512.png b/frontend/public/logo512.png new file mode 100644 index 0000000..f59e129 Binary files /dev/null and b/frontend/public/logo512.png differ diff --git a/frontend/public/main-logo.png b/frontend/public/main-logo.png new file mode 100755 index 0000000..ae5ddec Binary files /dev/null and b/frontend/public/main-logo.png differ diff --git a/ketr-chat/public/manifest.json b/frontend/public/manifest.json similarity index 100% rename from ketr-chat/public/manifest.json rename to frontend/public/manifest.json diff --git a/ketr-chat/public/partial-react-logo.png b/frontend/public/partial-react-logo.png similarity index 100% rename from ketr-chat/public/partial-react-logo.png rename to frontend/public/partial-react-logo.png diff --git a/ketr-chat/public/react-logo.png b/frontend/public/react-logo.png similarity index 100% rename from ketr-chat/public/react-logo.png rename to frontend/public/react-logo.png diff --git a/ketr-chat/public/react-logo@2x.png b/frontend/public/react-logo@2x.png similarity index 100% rename from ketr-chat/public/react-logo@2x.png rename to frontend/public/react-logo@2x.png diff --git a/ketr-chat/public/react-logo@3x.png b/frontend/public/react-logo@3x.png similarity index 100% rename from ketr-chat/public/react-logo@3x.png rename to frontend/public/react-logo@3x.png diff --git a/ketr-chat/public/robots.txt b/frontend/public/robots.txt similarity index 100% rename from ketr-chat/public/robots.txt rename to frontend/public/robots.txt diff --git a/ketr-chat/public/settings.png b/frontend/public/settings.png similarity index 100% rename from ketr-chat/public/settings.png rename to frontend/public/settings.png diff --git a/ketr-chat/public/splash-icon.png b/frontend/public/splash-icon.png similarity index 100% rename from ketr-chat/public/splash-icon.png rename to frontend/public/splash-icon.png diff --git a/ketr-chat/src/App.css b/frontend/src/App.css similarity index 78% rename from ketr-chat/src/App.css rename to frontend/src/App.css index e3632b4..efb6bad 100644 --- a/ketr-chat/src/App.css +++ b/frontend/src/App.css @@ -2,6 +2,20 @@ div { box-sizing: border-box } +.TabPanel { + display: flex; + height: 100%; +} + +.MuiToolbar-root .MuiBox-root { + border-bottom: none; +} + +.MuiTabs-root .MuiTabs-indicator { + background-color: orange; + +} + .SystemInfo { display: flex; flex-direction: column; @@ -32,7 +46,7 @@ div { display: flex; flex-direction: column; flex-grow: 1; - max-width: 800px; + max-width: 1024px; margin: 0 auto; } @@ -67,6 +81,7 @@ div { padding: 10px; flex-direction: column; height: 100%; + max-height: 100%; } .user-message.MuiCard-root { @@ -89,6 +104,7 @@ div { flex-grow: 0; } +.About.MuiCard-root, .assistant-message.MuiCard-root { border: 1px solid #E0E0E0; background-color: #FFFFFF; @@ -108,18 +124,30 @@ div { font-size: 0.9rem; } + +.About.MuiCard-root { + display: flex; + flex-grow: 1; + width: 100%; + margin-left: 0; + margin-right: 0; +} + +.About .MuiCardContent-root, .assistant-message .MuiCardContent-root { padding: 0 16px !important; font-size: 0.9rem; } +.About span, .assistant-message span { font-size: 0.9rem; } .user-message .MuiCardContent-root:last-child, -.assistant-message .MuiCardContent-root:last-child { - padding: 16px; +.assistant-message .MuiCardContent-root:last-child, +.About .MuiCardContent-root:last-child { + padding: 16px; } .users > div { @@ -137,6 +165,7 @@ div { } /* Reduce general whitespace in markdown content */ +.About p.MuiTypography-root, .assistant-message p.MuiTypography-root { margin-top: 0.5rem; margin-bottom: 0.5rem; @@ -149,7 +178,13 @@ div { .assistant-message h3.MuiTypography-root, .assistant-message h4.MuiTypography-root, .assistant-message h5.MuiTypography-root, -.assistant-message h6.MuiTypography-root { +.assistant-message h6.MuiTypography-root, +.About h1.MuiTypography-root, +.About h2.MuiTypography-root, +.About h3.MuiTypography-root, +.About h4.MuiTypography-root, +.About h5.MuiTypography-root, +.About h6.MuiTypography-root { margin-top: 1rem; margin-bottom: 0.5rem; font-size: 1rem; @@ -157,17 +192,21 @@ div { /* Reduce space in lists */ .assistant-message ul.MuiTypography-root, -.assistant-message ol.MuiTypography-root { - margin-top: 0.5rem; +.assistant-message ol.MuiTypography-root, +.About ul.MuiTypography-root, +.About ol.MuiTypography-root { + margin-top: 0.5rem; margin-bottom: 0.5rem; font-size: 0.9rem; } +.About li.MuiTypography-root, .assistant-message li.MuiTypography-root { margin-bottom: 0.25rem; font-size: 0.9rem; } +.About .MuiTypography-root li, .assistant-message .MuiTypography-root li { margin-top: 0; margin-bottom: 0; @@ -176,6 +215,7 @@ div { } /* Reduce space around code blocks */ +.About .MuiTypography-root pre, .assistant-message .MuiTypography-root pre { border: 1px solid #F5F5F5; border-radius: 0.5rem; diff --git a/ketr-chat/src/App.tsx b/frontend/src/App.tsx similarity index 74% rename from ketr-chat/src/App.tsx rename to frontend/src/App.tsx index a713928..abb0d53 100644 --- a/ketr-chat/src/App.tsx +++ b/frontend/src/App.tsx @@ -2,6 +2,9 @@ import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'r import FormGroup from '@mui/material/FormGroup'; import FormControlLabel from '@mui/material/FormControlLabel'; import { styled } from '@mui/material/styles'; +import Avatar from '@mui/material/Avatar'; +import Tabs from '@mui/material/Tabs'; +import Tab from '@mui/material/Tab'; import Switch from '@mui/material/Switch'; import Divider from '@mui/material/Divider'; import Tooltip from '@mui/material/Tooltip'; @@ -19,7 +22,7 @@ import Drawer from '@mui/material/Drawer'; import Toolbar from '@mui/material/Toolbar'; import SettingsIcon from '@mui/icons-material/Settings'; import CloseIcon from '@mui/icons-material/Close'; -import IconButton, { IconButtonProps } from '@mui/material/IconButton'; +import IconButton from '@mui/material/IconButton'; import Box from '@mui/material/Box'; import CssBaseline from '@mui/material/CssBaseline'; import ResetIcon from '@mui/icons-material/History'; @@ -27,22 +30,16 @@ import SendIcon from '@mui/icons-material/Send'; import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import Card from '@mui/material/Card'; import CardContent from '@mui/material/CardContent'; -import CardActions from '@mui/material/CardActions'; -import Collapse from '@mui/material/Collapse'; -import Table from '@mui/material/Table'; -import TableBody from '@mui/material/TableBody'; -import TableCell from '@mui/material/TableCell'; -import TableContainer from '@mui/material/TableContainer'; -import TableHead from '@mui/material/TableHead'; -import TableRow from '@mui/material/TableRow'; import PropagateLoader from "react-spinners/PropagateLoader"; import { MuiMarkdown } from "mui-markdown"; -import ReactMarkdown from 'react-markdown'; -import rehypeKatex from 'rehype-katex' -import remarkMath from 'remark-math' -import 'katex/dist/katex.min.css' // `rehype-katex` does not import the CSS for you + +import { ResumeBuilder } from './ResumeBuilder'; +import { Message, MessageList } from './Message'; +import { SeverityType } from './Snack'; +import { ContextStatus } from './ContextStatus'; + import './App.css'; @@ -51,13 +48,10 @@ import '@fontsource/roboto/400.css'; import '@fontsource/roboto/500.css'; import '@fontsource/roboto/700.css'; -//const use_mui_markdown = true -const use_mui_markdown = true - const welcomeMarkdown = ` -# Welcome to AIRC +# Welcome to Backstory -This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. +Backstory was written by James Ketrenos in order to provide answers to questions potential employers may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. You can ask things like: * @@ -68,7 +62,7 @@ You can ask things like: Or click the text above to submit that query. -As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_airc@ketrenos.com**. +As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_backstory@ketrenos.com**. `; const welcomeMessage = { @@ -89,8 +83,6 @@ type Tool = { enabled: boolean }; -type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined; - interface ControlsParams { tools: Tool[], rags: Tool[], @@ -115,33 +107,13 @@ type SystemInfo = { "CPU": string }; -type MessageMetadata = { - rag: any, - tools: any[], - eval_count: number, - eval_duration: number, - prompt_eval_count: number, - prompt_eval_duration: number -}; - -type MessageData = { - role: string, - content: string, - user?: string, - type?: string, - id?: string, - isProcessing?: boolean, - metadata?: MessageMetadata -}; - -type MessageList = MessageData[]; const getConnectionBase = (loc: any): string => { if (!loc.host.match(/.*battle-linux.*/)) { return loc.protocol + "//" + loc.host; } else { - return loc.protocol + "//battle-linux.ketrenos.com:5000"; + return loc.protocol + "//battle-linux.ketrenos.com:8911"; } } @@ -316,197 +288,30 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, messageHis ); } -interface ExpandMoreProps extends IconButtonProps { - expand: boolean; + +interface TabPanelProps { + children?: React.ReactNode; + index: number; + tab: number; } -const ExpandMore = styled((props: ExpandMoreProps) => { - const { expand, ...other } = props; - return ; -})(({ theme }) => ({ - marginLeft: 'auto', - transition: theme.transitions.create('transform', { - duration: theme.transitions.duration.shortest, - }), - variants: [ - { - props: ({ expand }) => !expand, - style: { - transform: 'rotate(0deg)', - }, - }, - { - props: ({ expand }) => !!expand, - style: { - transform: 'rotate(180deg)', - }, - }, - ], -})); - -interface MessageInterface { - message: MessageData, - submitQuery: (text: string) => void -}; - -interface MessageMetaInterface { - metadata: MessageMetadata -} -const MessageMeta = ({ metadata }: MessageMetaInterface) => { - if (metadata === undefined) { - return <> - } - - return (<> - - Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response. - - - - - - - Tokens - Time (s) - TPS - - - - - Prompt - {metadata.prompt_eval_count} - {Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100} - {Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)} - - - Response - {metadata.eval_count} - {Math.round(metadata.eval_duration / 10 ** 7) / 100} - {Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)} - - - Total - {metadata.prompt_eval_count + metadata.eval_count} - {Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100} - {Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))} - - -
-
- { - metadata.tools !== undefined && metadata.tools.length !== 0 && - - }> - - Tools queried - - - - {metadata.tools.map((tool: any, index: number) => - {index !== 0 && } - -
- {tool.tool} -
-
{JSON.stringify(tool.result, null, 2)}
-
-
)} -
-
- } - { - metadata.rag.name !== undefined && - - }> - - Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions - - - - {metadata.rag.ids.map((id: number, index: number) => - {index !== 0 && } - -
-
Doc ID: {metadata.rag.ids[index]}
-
Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}
-
Type: {metadata.rag.metadatas[index].doc_type}
-
Chunk Len: {metadata.rag.documents[index].length}
-
-
{metadata.rag.documents[index]}
-
-
- )} -
-
- } - - ); -}; - -interface ChatQueryInterface { - text: string, - submitQuery: (text: string) => void -} - -const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => { - return (); -} - -const Message = ({ message, submitQuery }: MessageInterface) => { - const [expanded, setExpanded] = React.useState(false); - - const handleExpandClick = () => { - setExpanded(!expanded); - }; - - const formattedContent = message.content.trim(); +function CustomTabPanel(props: TabPanelProps) { + const { children, tab, index, ...other } = props; return ( - - - {message.role === 'assistant' ? - use_mui_markdown ? : - : - - {message.content} - - } - - {message.metadata && <> - - LLM information for this query - - - - - - - - - - } - +
+ {tab === index && children} +
); } -type ContextStatus = { - context_used: number, - max_context: number -}; - const App = () => { const [query, setQuery] = useState(''); const [conversation, setConversation] = useState([]); @@ -531,7 +336,8 @@ const App = () => { const [lastPromptTPS, setLastPromptTPS] = useState(430); const [countdown, setCountdown] = useState(0); const [messageHistoryLength, setMessageHistoryLength] = useState(5); - + const [tab, setTab] = useState(0); + const [about, setAbout] = useState(""); const timerRef = useRef(null); const startCountdown = (seconds: number) => { @@ -611,6 +417,33 @@ const App = () => { }); }, [systemInfo, setSystemInfo, loc, setSnack, sessionId]) + // Get the About markdown + useEffect(() => { + if (about !== "") { + return; + } + const fetchAbout = async () => { + try { + const response = await fetch("/docs/about.md", { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }); + if (!response.ok) { + throw Error("/docs/about.md not found"); + } + const data = await response.text(); + setAbout(data); + } catch (error: any) { + console.error('Error obtaining About content information:', error); + setAbout("No information provided."); + }; + }; + + fetchAbout(); + }, [about, setAbout]) + // Update the context status const updateContextStatus = useCallback(() => { fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, { @@ -963,7 +796,7 @@ const App = () => { case 'QueryInput': sendQuery(query); break; - } + } } }; @@ -1148,6 +981,7 @@ const App = () => { } }; + const handleSnackClose = ( event: React.SyntheticEvent | Event, reason?: SnackbarCloseReason, @@ -1159,6 +993,10 @@ const App = () => { setSnackOpen(false); }; + const handleTabChange = (event: React.SyntheticEvent, newValue: number) => { + setTab(newValue); + }; + const Offset = styled('div')(({ theme }) => theme.mixins.toolbar); return ( @@ -1194,9 +1032,16 @@ const App = () => {
- - ai.ketrenos.com - + + + } iconPosition="start" /> + + + + { mobileOpen === true && @@ -1244,62 +1089,79 @@ const App = () => { {drawer} - - - {conversation.map((message, index) => )} - - + + + {conversation.map((message, index) => )} + + + {processing === true && countdown > 0 && ( + Estimated response time: {countdown}s + )} + + + Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context} + { + contextUsedPercentage >= 90 ? WARNING: Context almost exhausted. You should start a new chat. + : (contextUsedPercentage >= 50 ? NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls. + : <>) + } + + + + setQuery(e.target.value)} + onKeyDown={handleKeyPress} + placeholder="Enter your question..." + id="QueryInput" /> - {processing === true && countdown > 0 && ( - Estimated response time: {countdown}s - )} - - - Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context} - { - contextUsedPercentage >= 90 ? WARNING: Context almost exhausted. You should start a new chat. - : (contextUsedPercentage >= 50 ? NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls. - : <>) - } - - - - setQuery(e.target.value)} - onKeyDown={handleKeyPress} - placeholder="Enter your question..." - id="QueryInput" - /> - - + - + - - + + + + + + + + + + + {about} + + + + + + + ; +} + +function ChatBubble({ isUser, isFullWidth, children, sx }: ChatBubbleProps) { + const theme = useTheme(); + + const userStyle = { + backgroundColor: theme.palette.background.default, // Warm Gray (#D3CDBF) + border: `1px solid ${theme.palette.custom.highlight}`, // Golden Ochre (#D4A017) + borderRadius: '16px 16px 0 16px', // Rounded, flat bottom-right for user + padding: theme.spacing(1, 2), + maxWidth: isFullWidth ? '100%' : '70%', + minWidth: '70%', + alignSelf: 'flex-end', // Right-aligned for user + color: theme.palette.primary.main, // Midnight Blue (#1A2536) for text + '& > *': { + color: 'inherit', // Children inherit Midnight Blue unless overridden + }, + }; + + const assistantStyle = { + backgroundColor: theme.palette.primary.main, // Midnight Blue (#1A2536) + border: `1px solid ${theme.palette.secondary.main}`, // Dusty Teal (#4A7A7D) + borderRadius: '16px 16px 16px 0', // Rounded, flat bottom-left for assistant + padding: theme.spacing(1, 2), + maxWidth: isFullWidth ? '100%' : '70%', + minWidth: '70%', + alignSelf: 'flex-start', // Left-aligned for assistant + color: theme.palette.primary.contrastText, // Warm Gray (#D3CDBF) for text + '& > *': { + color: 'inherit', // Children inherit Warm Gray unless overridden + }, + }; + + return ( + + {children} + + ); +} + +export type { + ChatBubbleProps +}; + +export { + ChatBubble +}; + diff --git a/frontend/src/ContextStatus.tsx b/frontend/src/ContextStatus.tsx new file mode 100644 index 0000000..b4f8464 --- /dev/null +++ b/frontend/src/ContextStatus.tsx @@ -0,0 +1,8 @@ +type ContextStatus = { + context_used: number, + max_context: number +}; + +export type { + ContextStatus +}; \ No newline at end of file diff --git a/frontend/src/DocumentViewer.tsx b/frontend/src/DocumentViewer.tsx new file mode 100644 index 0000000..5e9071a --- /dev/null +++ b/frontend/src/DocumentViewer.tsx @@ -0,0 +1,201 @@ +import React, { useState } from 'react'; +import { + Typography, + Button, + Tabs, + Tab, + Paper, + IconButton, + Box, + useMediaQuery, + Divider, + Slider, + Stack, + TextField +} from '@mui/material'; +import Tooltip from '@mui/material/Tooltip'; +import { useTheme } from '@mui/material/styles'; +import SendIcon from '@mui/icons-material/Send'; +import { + ChevronLeft, + ChevronRight, + SwapHoriz, +} from '@mui/icons-material'; +import { SxProps, Theme } from '@mui/material'; +import { MuiMarkdown } from "mui-markdown"; + +import { MessageData } from './MessageMeta'; + +interface DocumentComponentProps { + title: string; + children?: React.ReactNode; +} + +interface DocumentViewerProps { + generateResume: (jobDescription: string) => void, + resume: MessageData | undefined, + sx?: SxProps, +}; + +const DocumentViewer: React.FC = ({generateResume, resume, sx} : DocumentViewerProps) => { + const [jobDescription, setJobDescription] = useState(""); + const theme = useTheme(); + const isMobile = useMediaQuery(theme.breakpoints.down('md')); + + // State for controlling which document is active on mobile + const [activeDocMobile, setActiveDocMobile] = useState(0); + // State for controlling split ratio on desktop + const [splitRatio, setSplitRatio] = useState(50); + + // Handle tab change for mobile + const handleTabChange = (_event: React.SyntheticEvent, newValue: number): void => { + setActiveDocMobile(newValue); + }; + + // Adjust split ratio + const handleSliderChange = (_event: Event, newValue: number | number[]): void => { + setSplitRatio(newValue as number); + }; + + // Reset split ratio + const resetSplit = (): void => { + setSplitRatio(50); + }; + + const handleKeyPress = (event: any) => { + if (event.key === 'Enter' && event.ctrlKey) { + generateResume(jobDescription); + } + }; + + // Document component + const Document: React.FC = ({ title, children }) => ( + + { title !== "" && + + {title} + } + + {children} + + + ); + + // Mobile view + if (isMobile) { + return ( + + {/* Tabs */} + + + + + + {/* Document display area */} + + {activeDocMobile === 0 ? (<> + + setJobDescription(e.target.value)} + onKeyDown={handleKeyPress} + placeholder="Enter job description..." + /> + + + ) : ( + { resume !== undefined && } + )} + + + ); + } + + // Desktop view + return ( + + {/* Split document view */} + + + + setJobDescription(e.target.value)} + onKeyDown={handleKeyPress} + placeholder="Enter job description..." + /> + + + + + + + + { resume !== undefined && } + + + {/* Split control panel */} + + + setSplitRatio(Math.max(20, splitRatio - 10))}> + + + + + + setSplitRatio(Math.min(80, splitRatio + 10))}> + + + + + + + + + + ); +}; + +export type { + DocumentViewerProps +}; + +export { DocumentViewer }; \ No newline at end of file diff --git a/frontend/src/ExpandMore.tsx b/frontend/src/ExpandMore.tsx new file mode 100644 index 0000000..97b6457 --- /dev/null +++ b/frontend/src/ExpandMore.tsx @@ -0,0 +1,34 @@ +import { styled } from '@mui/material/styles'; +import IconButton, { IconButtonProps } from '@mui/material/IconButton'; + +interface ExpandMoreProps extends IconButtonProps { + expand: boolean; +} + +const ExpandMore = styled((props: ExpandMoreProps) => { + const { expand, ...other } = props; + return ; +})(({ theme }) => ({ + marginLeft: 'auto', + transition: theme.transitions.create('transform', { + duration: theme.transitions.duration.shortest, + }), + variants: [ + { + props: ({ expand }) => !expand, + style: { + transform: 'rotate(0deg)', + }, + }, + { + props: ({ expand }) => !!expand, + style: { + transform: 'rotate(180deg)', + }, + }, + ], +})); + +export { + ExpandMore +}; \ No newline at end of file diff --git a/frontend/src/Message.tsx b/frontend/src/Message.tsx new file mode 100644 index 0000000..d507963 --- /dev/null +++ b/frontend/src/Message.tsx @@ -0,0 +1,100 @@ +import { useState } from 'react'; +import Box from '@mui/material/Box'; +import Button from '@mui/material/Button'; +import CardContent from '@mui/material/CardContent'; +import CardActions from '@mui/material/CardActions'; +import Collapse from '@mui/material/Collapse'; +import { MuiMarkdown } from "mui-markdown"; +import Typography from '@mui/material/Typography'; +import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; +import { ExpandMore } from './ExpandMore'; + +import { MessageData, MessageMeta } from './MessageMeta'; +import { ChatBubble } from './ChatBubble'; + +type MessageList = MessageData[]; + +interface MessageInterface { + message?: MessageData, + isFullWidth?: boolean, + submitQuery?: (text: string) => void +}; + +interface ChatQueryInterface { + text: string, + submitQuery?: (text: string) => void +} + +const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => { + return (submitQuery + ? + : {text}); +} + +const Message = ({ message, submitQuery, isFullWidth }: MessageInterface) => { + const [expanded, setExpanded] = useState(false); + + const handleExpandClick = () => { + setExpanded(!expanded); + }; + + if (message === undefined) { + return (<>); + } + + const formattedContent = message.content.trim(); + + return ( + + + {message.role === 'assistant' ? + + : + + {message.content} + + } + + {message.metadata && <> + + LLM information for this query + + + + + + + + + + } + + ); +}; + +export type { + MessageInterface, + MessageList +}; +export { + Message +}; + diff --git a/frontend/src/MessageMeta.tsx b/frontend/src/MessageMeta.tsx new file mode 100644 index 0000000..9ebef46 --- /dev/null +++ b/frontend/src/MessageMeta.tsx @@ -0,0 +1,135 @@ +//import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react'; +import Divider from '@mui/material/Divider'; +import Accordion from '@mui/material/Accordion'; +import AccordionSummary from '@mui/material/AccordionSummary'; +import AccordionDetails from '@mui/material/AccordionDetails'; +import Box from '@mui/material/Box'; +import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; +import Card from '@mui/material/Card'; +import Table from '@mui/material/Table'; +import TableBody from '@mui/material/TableBody'; +import TableCell from '@mui/material/TableCell'; +import TableContainer from '@mui/material/TableContainer'; +import TableHead from '@mui/material/TableHead'; +import TableRow from '@mui/material/TableRow'; + +type MessageMetadata = { + rag: any, + tools: any[], + eval_count: number, + eval_duration: number, + prompt_eval_count: number, + prompt_eval_duration: number +}; + +type MessageData = { + role: string, + content: string, + user?: string, + type?: string, + id?: string, + isProcessing?: boolean, + metadata?: MessageMetadata +}; + +interface MessageMetaInterface { + metadata: MessageMetadata +} +const MessageMeta = ({ metadata }: MessageMetaInterface) => { + if (metadata === undefined) { + return <> + } + + return (<> + + Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response. + + + + + + + Tokens + Time (s) + TPS + + + + + Prompt + {metadata.prompt_eval_count} + {Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100} + {Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)} + + + Response + {metadata.eval_count} + {Math.round(metadata.eval_duration / 10 ** 7) / 100} + {Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)} + + + Total + {metadata.prompt_eval_count + metadata.eval_count} + {Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100} + {Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))} + + +
+
+ { + metadata.tools !== undefined && metadata.tools.length !== 0 && + + }> + + Tools queried + + + + {metadata.tools.map((tool: any, index: number) => + {index !== 0 && } + +
+ {tool.tool} +
+
{JSON.stringify(tool.result, null, 2)}
+
+
)} +
+
+ } + { + metadata.rag.name !== undefined && + + }> + + Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions + + + + {metadata.rag.ids.map((id: number, index: number) => + {index !== 0 && } + +
+
Doc ID: {metadata.rag.ids[index]}
+
Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}
+
Type: {metadata.rag.metadatas[index].doc_type}
+
Chunk Len: {metadata.rag.documents[index].length}
+
+
{metadata.rag.documents[index]}
+
+
+ )} +
+
+ } + + ); +}; + +export type { + MessageMetadata, + MessageMetaInterface, + MessageData +}; + +export { MessageMeta }; \ No newline at end of file diff --git a/frontend/src/ResumeBuilder.tsx b/frontend/src/ResumeBuilder.tsx new file mode 100644 index 0000000..33c4cb0 --- /dev/null +++ b/frontend/src/ResumeBuilder.tsx @@ -0,0 +1,310 @@ +import { useState, useCallback, useRef } from 'react'; +import Box from '@mui/material/Box'; +import TextField from '@mui/material/TextField'; +import PropagateLoader from "react-spinners/PropagateLoader"; +import Tooltip from '@mui/material/Tooltip'; +import Button from '@mui/material/Button'; +import SendIcon from '@mui/icons-material/Send'; + +import { Message } from './Message'; +import { SeverityType } from './Snack'; +import { ContextStatus } from './ContextStatus'; +import { MessageData } from './MessageMeta'; +import { DocumentViewer } from './DocumentViewer'; + +interface ResumeBuilderProps { + scrollToBottom: () => void, + isScrolledToBottom: () => boolean, + setProcessing: (processing: boolean) => void, + processing: boolean, + connectionBase: string, + sessionId: string | undefined, + setSnack: (message: string, severity?: SeverityType) => void, +}; + +const ResumeBuilder = ({scrollToBottom, isScrolledToBottom, setProcessing, processing, connectionBase, sessionId, setSnack} : ResumeBuilderProps) => { + const [jobDescription, setJobDescription] = useState(""); + const [generateStatus, setGenerateStatus] = useState(undefined); + const [lastEvalTPS, setLastEvalTPS] = useState(35); + const [lastPromptTPS, setLastPromptTPS] = useState(430); + const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 }); + const [countdown, setCountdown] = useState(0); + const [resume, setResume] = useState(undefined); + const timerRef = useRef(null); + + const updateContextStatus = useCallback(() => { + fetch(connectionBase + `/api/context-status/${sessionId}`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }) + .then(response => response.json()) + .then(data => { + setContextStatus(data); + }) + .catch(error => { + console.error('Error getting context status:', error); + setSnack("Unable to obtain context status.", "error"); + }); + }, [setContextStatus, connectionBase, setSnack, sessionId]); + + const startCountdown = (seconds: number) => { + if (timerRef.current) clearInterval(timerRef.current); + setCountdown(seconds); + timerRef.current = setInterval(() => { + setCountdown((prev) => { + if (prev <= 1) { + clearInterval(timerRef.current); + timerRef.current = null; + if (isScrolledToBottom()) { + setTimeout(() => { + scrollToBottom(); + }, 50) + } + return 0; + } + return prev - 1; + }); + }, 1000); + }; + + const stopCountdown = () => { + if (timerRef.current) { + clearInterval(timerRef.current); + timerRef.current = null; + setCountdown(0); + } + }; + + if (sessionId === undefined) { + return (<>); + } + + + + const handleKeyPress = (event: any) => { + if (event.key === 'Enter' && !event.ctrlKey) { + generateResume(jobDescription); + } + }; + + const generateResume = async (jobDescription: string) => { + if (!jobDescription.trim()) return; + // setResume(undefined); + + let scrolledToBottom; + + scrollToBottom(); + + try { + scrolledToBottom = isScrolledToBottom(); + setProcessing(true); + + // Add initial processing message + setGenerateStatus({ role: 'assistant', content: 'Processing request...' }); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + + // Make the fetch request with proper headers + const response = await fetch(connectionBase + `/api/generate-resume/${sessionId}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }, + body: JSON.stringify({ content: jobDescription.trim() }), + }); + + // We'll guess that the response will be around 500 tokens... + const token_guess = 500; + const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS); + + scrolledToBottom = isScrolledToBottom(); + setSnack(`Job description sent. Response estimated in ${estimate}s.`, "info"); + startCountdown(Math.round(estimate)); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + + if (!response.ok) { + throw new Error(`Server responded with ${response.status}: ${response.statusText}`); + } + + if (!response.body) { + throw new Error('Response body is null'); + } + + // Set up stream processing with explicit chunking + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + const chunk = decoder.decode(value, { stream: true }); + + // Process each complete line immediately + buffer += chunk; + let lines = buffer.split('\n'); + buffer = lines.pop() || ''; // Keep incomplete line in buffer + for (const line of lines) { + if (!line.trim()) continue; + + try { + const update = JSON.parse(line); + + // Force an immediate state update based on the message type + if (update.status === 'processing') { + scrolledToBottom = isScrolledToBottom(); + // Update processing message with immediate re-render + setGenerateStatus({ role: 'info', content: update.message }); + console.log(update.num_ctx); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + + // Add a small delay to ensure React has time to update the UI + await new Promise(resolve => setTimeout(resolve, 0)); + + } else if (update.status === 'done') { + // Replace processing message with final result + scrolledToBottom = isScrolledToBottom(); + setGenerateStatus(undefined); + setResume(update.message); + const metadata = update.message.metadata; + const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration; + const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration; + setLastEvalTPS(evalTPS ? evalTPS : 35); + setLastPromptTPS(promptTPS ? promptTPS : 35); + updateContextStatus(); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + } else if (update.status === 'error') { + // Show error + scrolledToBottom = isScrolledToBottom(); + setGenerateStatus({role: 'error', content: update.message }); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + } + } catch (e) { + setSnack("Error generating resume", "error") + console.error('Error parsing JSON:', e, line); + } + } + } + + // Process any remaining buffer content + if (buffer.trim()) { + try { + const update = JSON.parse(buffer); + + if (update.status === 'done') { + scrolledToBottom = isScrolledToBottom(); + setGenerateStatus(undefined); + setResume(update.message); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 500); + } + } + } catch (e) { + setSnack("Error processing job description", "error") + } + } + + scrolledToBottom = isScrolledToBottom(); + stopCountdown(); + setProcessing(false); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + } catch (error) { + console.error('Fetch error:', error); + setSnack("Unable to process job description", "error"); + scrolledToBottom = isScrolledToBottom(); + setGenerateStatus({ role: 'error', content: `Error: ${error}` }); + setProcessing(false); + stopCountdown(); + if (scrolledToBottom) { + setTimeout(() => { scrollToBottom() }, 50); + } + } + }; + + return ( + + + + + + ); + return ( + + setJobDescription(e.target.value)} + onKeyDown={handleKeyPress} + placeholder="Enter the job description.." + id="JobDescriptionInput" + /> + + + + + + {processing === true && countdown > 0 && ( + Estimated response time: {countdown}s + )} + + {generateStatus && } + {/* {resume && } */} + + + ); +} + + +export type { + ResumeBuilderProps +}; + +export { + ResumeBuilder +}; + diff --git a/frontend/src/Snack.tsx b/frontend/src/Snack.tsx new file mode 100644 index 0000000..1bef5a5 --- /dev/null +++ b/frontend/src/Snack.tsx @@ -0,0 +1,5 @@ +type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined; + +export type { + SeverityType +}; \ No newline at end of file diff --git a/ketr-chat/src/index.css b/frontend/src/index.css similarity index 100% rename from ketr-chat/src/index.css rename to frontend/src/index.css diff --git a/ketr-chat/src/index.tsx b/frontend/src/index.tsx similarity index 72% rename from ketr-chat/src/index.tsx rename to frontend/src/index.tsx index 032464f..dca0535 100644 --- a/ketr-chat/src/index.tsx +++ b/frontend/src/index.tsx @@ -1,5 +1,7 @@ import React from 'react'; import ReactDOM from 'react-dom/client'; +import { ThemeProvider } from '@mui/material/styles'; +import { backstoryTheme } from './BackstoryTheme'; // Adjust path as needed import './index.css'; import App from './App'; import reportWebVitals from './reportWebVitals'; @@ -9,7 +11,9 @@ const root = ReactDOM.createRoot( ); root.render( - + + + ); diff --git a/ketr-chat/src/logo.svg b/frontend/src/logo.svg similarity index 100% rename from ketr-chat/src/logo.svg rename to frontend/src/logo.svg diff --git a/ketr-chat/src/react-app-env.d.ts b/frontend/src/react-app-env.d.ts similarity index 100% rename from ketr-chat/src/react-app-env.d.ts rename to frontend/src/react-app-env.d.ts diff --git a/ketr-chat/src/reportWebVitals.ts b/frontend/src/reportWebVitals.ts similarity index 100% rename from ketr-chat/src/reportWebVitals.ts rename to frontend/src/reportWebVitals.ts diff --git a/ketr-chat/src/setupTests.ts b/frontend/src/setupTests.ts similarity index 100% rename from ketr-chat/src/setupTests.ts rename to frontend/src/setupTests.ts diff --git a/frontend/src/types/theme.d.ts b/frontend/src/types/theme.d.ts new file mode 100644 index 0000000..be21e5d --- /dev/null +++ b/frontend/src/types/theme.d.ts @@ -0,0 +1,16 @@ +import { Palette, PaletteOptions } from '@mui/material/styles'; + +declare module '@mui/material/styles' { + interface Palette { + custom: { + highlight: string; + contrast: string; + }; + } + interface PaletteOptions { + custom?: { + highlight: string; + contrast: string; + }; + } +} \ No newline at end of file diff --git a/ketr-chat/tsconfig.json b/frontend/tsconfig.json similarity index 100% rename from ketr-chat/tsconfig.json rename to frontend/tsconfig.json diff --git a/ketr-chat/favicon.ico b/ketr-chat/favicon.ico deleted file mode 100755 index ddf3b88..0000000 Binary files a/ketr-chat/favicon.ico and /dev/null differ diff --git a/ketr-chat/favicon.png b/ketr-chat/favicon.png deleted file mode 100644 index 9ad6bf3..0000000 Binary files a/ketr-chat/favicon.png and /dev/null differ diff --git a/ketr-chat/public/favicon.ico b/ketr-chat/public/favicon.ico deleted file mode 100644 index ddf3b88..0000000 Binary files a/ketr-chat/public/favicon.ico and /dev/null differ diff --git a/ketr-chat/public/favicon.png b/ketr-chat/public/favicon.png deleted file mode 100755 index 9ad6bf3..0000000 Binary files a/ketr-chat/public/favicon.png and /dev/null differ diff --git a/ketr-chat/public/logo192.png b/ketr-chat/public/logo192.png deleted file mode 100644 index 63b1235..0000000 Binary files a/ketr-chat/public/logo192.png and /dev/null differ diff --git a/ketr-chat/public/logo512.png b/ketr-chat/public/logo512.png deleted file mode 100644 index 84f69cf..0000000 Binary files a/ketr-chat/public/logo512.png and /dev/null differ diff --git a/src/doc/projects/ketr.chat.txt b/src/doc/projects/ketr.chat.txt deleted file mode 100644 index 6fcfa6a..0000000 --- a/src/doc/projects/ketr.chat.txt +++ /dev/null @@ -1,133 +0,0 @@ -# Ketr Chat - -This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. - -In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. - -## Parts of Ketr Chat - -* Backend Server - Provides a custom REST API to support the capabilities exposed from the web UI. - * Pytorch used for LLM communication and inference - * ChromaDB as a vector store for embedding similarities - * FastAPI for the http REST API endpoints - * Serves the static site for production deployment - * Performs all communication with the LLM (currently via ollama.cpp, however I may be switching it back to Hugging Face transformers.) - * Implements the tool subsystem for tool callbacks from the LLM - * Manages a chromadb vector store, including the chunking and embedding of the documents used to provide RAG content related to my career. - * Manages all context sessions - * Currently using qwen2.5:7b, however I frequently switch between different models (llama3.2, deepseek-r1:7b, and mistral:7b.) I've generally had the best results from qwen2.5. DeepSeek-R1 was very cool; the thinking phase was informative for developing system prompts, however the integration with ollama does not support tool calls. That is one reason I'm looking to switch back to Hugging Face transformers. - * Languages: Python, bash - -* Web Frontend - Provides a responsive UI for interacting with the system - * Written using React and Mui. - * Exposes enough information to know what the LLM is doing on the backend - * Enables adjusting various parameters, including enabling/disabling tools and the RAG, system prompt, etc. - * Configured to be able to run in development and production. In development mode, the Server does not serve the Web Frontend and only acts as a REST API endpoint. - * Languages: JSX, JavaScript, TypeScript, bash - -* Ollama container - If you don't already have ollama installed and running, the container provided in this project is built using the Intel pre-built Ollama package. - -* Jupyter notebook - To facilitate rapid development and prototyping, a Jupyter notebook is provided which runs on the same Python package set as the main server container. - -# Installation - -This project uses docker containers to build. As this was originally written to work on an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10).. - -NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/) - -## Want to run under WSL2? No can do... - -https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html - -The A- and B-series discrete GPUs do not support SR-IOV, required for the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL. - -## Building - -NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/) - - -```bash -git clone https://github.com/jketreno/ketr-chat -cd ketr-chat -docker compose build -``` - -## Running - -In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token. - -Edit .env to add the following: - -```.env -HF_ACCESS_TOKEN= -``` - -NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container. - -### Ketr Chat - -To launch the ketr-chat shell interactively, with the pytorch 2.6 environment loaded, use the default entrypoint to launch a shell: - -```bash -docker compose run --rm ketr-chat shell -``` - -Once in the shell, you can then launch the server.py: - -```bash -docker compose run --rm ketr-chat shell -python src/server.py -``` - -If you launch the server without any parameters, it will run the backend server, which will host the static web frontend built during the `docker compose build`. - -That is the behavior if you up the container: - -```bash -docker compose up -d -``` - -### Jupyter - -```bash -docker compose up jupyter -d -``` - -The default port for inbound connections is 8888 (see docker-compose.yml). $(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default. - -To access the jupyter notebook, go to `https://localhost:8888/jupyter`. - -### Monitoring - -You can run `ze-monitor` within the launched containers to monitor GPU usage. - -```bash -containers=($(docker ps --filter "ancestor=ketr-chat" --format "{{.ID}}")) -if [[ ${#containers[*]} -eq 0 ]]; then - echo "Running ketr-chat container not found." -else - for container in ${containers[@]}; do - echo "Container ${container} devices:" - docker exec -it ${container} ze-monitor - done -fi -``` - -If an ketr-chat container is running, you should see something like: - -``` -Container 5317c503e771 devices: -Device 1: 8086:A780 (Intel(R) UHD Graphics 770) -Device 2: 8086:E20B (Intel(R) Graphics [0xe20b]) -``` - -You can then launch ze-monitor in that container specifying the device you wish to monitor: - -``` -containers=($(docker ps --filter "ancestor=ketr-chat" --format "{{.ID}}")) -docker exec -it ${containers[0]} ze-monitor --device 2 -``` \ No newline at end of file diff --git a/src/doc/projects/ze-monitor.txt b/src/doc/projects/ze-monitor.txt deleted file mode 100644 index 08d0c9b..0000000 --- a/src/doc/projects/ze-monitor.txt +++ /dev/null @@ -1,279 +0,0 @@ -# ze-monitor - -A small utility to monitor Level Zero devices via -[Level Zero Sysman](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/sysman/PROG.html#sysman-programming-guide) -from the command line, similar to 'top'. - -# Installation - -Requires Ubuntu Oracular 24.10. - -## Easiest - -### Install prerequisites - -This will add the [Intel Graphics Preview PPA](https://github.com/canonical/intel-graphics-preview) and install the required dependencies: - -```bash -sudo apt-get install -y \ - software-properties-common \ - && sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \ - && sudo apt-get update \ - && sudo apt-get install -y \ - libze1 libze-intel-gpu1 libncurses6 -``` - -### Install ze-monitor from .deb package - -This will download the ze-monitor GitHub, install it, and add the current -user to the 'ze-monitor' group to allow running the utility: - -```bash -version=0.3.0-1 -wget https://github.com/jketreno/ze-monitor/releases/download/v${version}/ze-monitor-${version}_amd64.deb -sudo dpkg -i ze-monitor-${version}_amd64.deb -sudo usermod -a -G ze-monitor $(whoami) -newgrp ze-monitor -``` - -Congratulations! You can run ze-monitor: - -```bash -ze-monitor -``` - -You should see something like: - -```bash -Device 1: 8086:A780 (Intel(R) UHD Graphics 770) -Device 2: 8086:E20B (Intel(R) Graphics [0xe20b]) -``` - -To monitor a device: - -```bash -ze-monitor --device 2 -``` - -Check the docs (`man ze-monitor`) for additional details on running the ze-monitor utility. - -## Slightly more involved - -This project uses docker containers to build. As this was originally written to monitor an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10). It will monitor any Level Zero device, even those using the i915 driver. - -NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/) - -``` -git clone https://github.com/jketreno/ze-monitor.git -cd ze-monitor -docker compose build -sudo apt install libze1 libncurses6 -version=$(cat src/version.txt) -docker compose run --remove-orphans --rm \ - ze-monitor \ - cp /opt/ze-monitor-static/build/ze-monitor-${version}_amd64.deb \ - /opt/ze-monitor/build -sudo dpkg -i build/ze-monitor-${version}_amd64.deb -``` - -# Security - -In order for ze-monitor to read the performance metric units (PMU) in the Linux kernel, it needs elevated permissions. The easiest way is to install the .deb package and add the user to the ze-monitor group. Or, run under sudo (eg., `sudo ze-monitor ...`.) - -The specific capabilities required to monitor the GPU are documented in [Perf Security](https://www.kernel.org/doc/html/v5.1/admin-guide/perf-security.html) and [man capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). These include: - -| Capability | Reason | -|:--------------------|:-----------------------------------------------------| -| CAP_DAC_READ_SEARCH | Bypass all filesystem read access checks | -| CAP_PERFMON | Access to perf_events (vs. overloaded CAP_SYS_ADMIN) | -| CAP_SYS_PTRACE | PTRACE_MODE_READ_REALCREDS ptrace access mode check | - -To configure ze-monitor to run with those privileges, you can use `setcap` to set the correct capabilities on ze-monitor. You can further secure your system by creating a user group specifically for running the utility and restrict running of that command to users in that group. That is what the .deb package does. - -If you install the .deb package from a [Release](https://github.com/jketreno/ze-monitor/releases) or by building it, that package will set the appropriate permissions for ze-monitor on installation and set it executable only to those in the 'ze-monitor' group. - -## Anyone can run ze-monitor - -If you build from source and want to set the capabilities: - -```bash -sudo setcap "cap_perfmon,cap_dac_read_search,cap_sys_ptrace=ep" build/ze-monitor -getcap build/ze-monitor -``` - -Any user can then run `build/ze-monitor` and monitor the GPU. - -# Build outside container - -## Prerequisites - -If you would like to build outside of docker, you need the following packages installed: - -``` -sudo apt-get install -y \ - build-essential \ - libfmt-dev \ - libncurses-dev -``` - -In addition, you need the Intel drivers installed, which are available from the `kobuk-team/intel-graphics` PPA: - -``` -sudo apt-get install -y \ - software-properties-common \ - && sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \ - && sudo apt-get update \ - && sudo apt-get install -y \ - libze-intel-gpu1 \ - libze1 \ - libze-dev -``` -## Building - -``` -cd build -cmake .. -make -``` - -## Running - -``` -build/ze-monitor -``` - -## Build and install .deb - -In order to build the .deb package, you need the following packages installed: - -```bash -sudo apt-get install -y \ - debhelper \ - devscripts \ - rpm \ - rpm2cpio -``` - -You can then build the .deb: - -```bash -if [ -d build ]; then - cd build -fi -version=$(cat ../src/version.txt) -cpack -sudo dpkg -i build/packages/ze-monitor_${version}_amd64.deb -``` - -You can then run ze-monitor from your path: - -```bash -ze-monitor -``` - -# Developing - -To run the built binary without building a full .deb package, you can build and run on the host by compiling in the container: - -``` -docker compose run --rm ze-monitor build.sh -build/ze-monitor -``` - -The build.sh script will build the binary in /opt/ze-monitor/build, which is volume mounted to the host's build directory. - -NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities. - -# Running - -NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities. - -If running within a docker container, the container environment does not have access to the host's `/proc/fd`, which is necessary to obtain information about the processes outside the current container which are using the GPU. As such, only processes running within that container running ze-monitor will be listed as using the GPU. - -## List available devices - -``` -ze-monitor -``` - -Example output: - -```bash -$ ze-monitor -Device 1: 8086:E20B (Intel(R) Graphics [0xe20b]) -Device 2: 8086:A780 (Intel(R) UHD Graphics 770) -``` - -## Show details for a given device - -``` -sudo ze-monitor --info --device ( PCIID | # | BDF | UUID | /dev/dri/render*) -``` - -Example output: - -```bash -$ sudo ze-monitor --device 2 --info -Device: 8086:A780 (Intel(R) UHD Graphics 770) - UUID: 868080A7-0400-0000-0002-000000000000 - BDF: 0000:0000:0002:0000 - PCI ID: 8086:A780 - Subdevices: 0 - Serial Number: unknown - Board Number: unknown - Brand Name: unknown - Model Name: Intel(R) UHD Graphics 770 - Vendor Name: Intel(R) Corporation - Driver Version: 0CB7EFCAD5695B7EC5C8CE6 - Type: GPU - Is integrated with host: Yes - Is a sub-device: No - Supports error correcting memory: No - Supports on-demand page-faulting: No - Engines: 7 - Engine 1: ZES_ENGINE_GROUP_RENDER_SINGLE - Engine 2: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE - Engine 3: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE - Engine 4: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE - Engine 5: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE - Engine 6: ZES_ENGINE_GROUP_COPY_SINGLE - Engine 7: ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE - Temperature Sensors: 0 -``` - -NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities. - -## Monitor a given device - -``` -sudo ze-monitor --device ( PCIID | # | BDF | UUID | /dev/dri/render* ) \ - --interval ms -``` - -NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities. - -Output: - -```bash -$ sudo ze-monitor --device 2 --interval 500 -Device: 8086:E20B (Intel(R) Graphics [0xe20b]) -Total Memory: 12809404416 -Free memory: [# 55% ############################ ] -Power usage: 165.0W ------------------------------------------------------------------------------------------- - PID COMMAND-LINE - USED MEMORY SHARED MEMORY ENGINE FLAGS ------------------------------------------------------------------------------------------- - 1 /sbin/init splash - MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE - 1606 /usr/lib/systemd/systemd-logind - MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE - 5164 /usr/bin/gnome-shell - MEM: 530513920 SHR: 503316480 FLAGS: RENDER COMPUTE - 5237 /usr/bin/Xwayland :1024 -rootless -nores...isplayfd 6 -initfd 7 -byteswappedclients - MEM: 0 SHR: 0 FLAGS: - 40480 python chat.py - MEM: 5544226816 SHR: 0 FLAGS: DMA COMPUTE -``` - -If you pass `--one-shot`, statistics will be gathered, displayed, and then ze-monitor will exit. \ No newline at end of file diff --git a/src/ketr-chat/src/App.css b/src/ketr-chat/src/App.css deleted file mode 100644 index e3632b4..0000000 --- a/src/ketr-chat/src/App.css +++ /dev/null @@ -1,195 +0,0 @@ -div { - box-sizing: border-box -} - -.SystemInfo { - display: flex; - flex-direction: column; - gap: 5px; - padding: 5px; - flex-grow: 1; -} - -.SystemInfoItem { - display: flex; /* Grid for individual items */ - flex-direction: row; - flex-grow: 1; -} - -.SystemInfoItem > div:first-child { - display: flex; - justify-self: end; /* Align the first column content to the right */ - width: 10rem; -} - -.SystemInfoItem > div:last-child { - display: flex; - flex-grow: 1; - justify-self: end; /* Align the first column content to the right */ -} - -.ChatBox { - display: flex; - flex-direction: column; - flex-grow: 1; - max-width: 800px; - margin: 0 auto; -} - -.Controls { - display: flex; - background-color: #F5F5F5; - border: 1px solid #E0E0E0; - overflow-y: auto; - padding: 10px; - flex-direction: column; - margin-left: 10px; - box-sizing: border-box; - overflow-x: visible; - min-width: 10rem; - width: 100%; - flex-grow: 1; -} - -@media (min-width: 768px) { - .Controls { - width: 600px; /* or whatever you prefer for a desktop */ - max-width: 80vw; /* Optional: Prevent it from taking up too much space */ - } -} - -.Conversation { - display: flex; - background-color: #F5F5F5; - border: 1px solid #E0E0E0; - flex-grow: 1; - overflow-y: auto; - padding: 10px; - flex-direction: column; - height: 100%; -} - -.user-message.MuiCard-root { - background-color: #DCF8C6; - border: 1px solid #B2E0A7; - color: #333333; - margin-bottom: 0.75rem; - margin-left: 1rem; - border-radius: 0.25rem; - min-width: 80%; - max-width: 80%; - justify-self: right; - display: flex; - white-space: pre-wrap; - overflow-wrap: break-word; - word-break: break-word; - flex-direction: column; - align-items: self-end; - align-self: end; - flex-grow: 0; -} - -.assistant-message.MuiCard-root { - border: 1px solid #E0E0E0; - background-color: #FFFFFF; - color: #333333; - margin-bottom: 0.75rem; - margin-right: 1rem; - min-width: 70%; - border-radius: 0.25rem; - justify-self: left; - display: flex; - white-space: pre-wrap; - overflow-wrap: break-word; - word-break: break-word; - flex-direction: column; - flex-grow: 0; - padding: 16px 0; - font-size: 0.9rem; -} - -.assistant-message .MuiCardContent-root { - padding: 0 16px !important; - font-size: 0.9rem; -} - -.assistant-message span { - font-size: 0.9rem; -} - -.user-message .MuiCardContent-root:last-child, -.assistant-message .MuiCardContent-root:last-child { - padding: 16px; -} - -.users > div { - padding: 0.25rem; -} - -.user-active { - font-weight: bold; -} - -.metadata { - border: 1px solid #E0E0E0; - font-size: 0.75rem; - padding: 0.125rem; -} - -/* Reduce general whitespace in markdown content */ -.assistant-message p.MuiTypography-root { - margin-top: 0.5rem; - margin-bottom: 0.5rem; - font-size: 0.9rem; -} - -/* Reduce space between headings and content */ -.assistant-message h1.MuiTypography-root, -.assistant-message h2.MuiTypography-root, -.assistant-message h3.MuiTypography-root, -.assistant-message h4.MuiTypography-root, -.assistant-message h5.MuiTypography-root, -.assistant-message h6.MuiTypography-root { - margin-top: 1rem; - margin-bottom: 0.5rem; - font-size: 1rem; -} - -/* Reduce space in lists */ -.assistant-message ul.MuiTypography-root, -.assistant-message ol.MuiTypography-root { - margin-top: 0.5rem; - margin-bottom: 0.5rem; - font-size: 0.9rem; -} - -.assistant-message li.MuiTypography-root { - margin-bottom: 0.25rem; - font-size: 0.9rem; -} - -.assistant-message .MuiTypography-root li { - margin-top: 0; - margin-bottom: 0; - padding: 0; - font-size: 0.9rem; -} - -/* Reduce space around code blocks */ -.assistant-message .MuiTypography-root pre { - border: 1px solid #F5F5F5; - border-radius: 0.5rem; - padding: 0.5rem 0.75rem; - margin-top: 0; - margin-bottom: 0; - font-size: 0.9rem; -} - -.PromptStats .MuiTableCell-root { - font-size: 0.8rem; -} - -#SystemPromptInput { - font-size: 0.9rem; - line-height: 1.25rem; -} \ No newline at end of file diff --git a/src/ketr-chat/src/App.tsx b/src/ketr-chat/src/App.tsx deleted file mode 100644 index a713928..0000000 --- a/src/ketr-chat/src/App.tsx +++ /dev/null @@ -1,1318 +0,0 @@ -import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react'; -import FormGroup from '@mui/material/FormGroup'; -import FormControlLabel from '@mui/material/FormControlLabel'; -import { styled } from '@mui/material/styles'; -import Switch from '@mui/material/Switch'; -import Divider from '@mui/material/Divider'; -import Tooltip from '@mui/material/Tooltip'; -import Snackbar, { SnackbarCloseReason } from '@mui/material/Snackbar'; -import Alert from '@mui/material/Alert'; -import TextField from '@mui/material/TextField'; -import Accordion from '@mui/material/Accordion'; -import AccordionActions from '@mui/material/AccordionActions'; -import AccordionSummary from '@mui/material/AccordionSummary'; -import AccordionDetails from '@mui/material/AccordionDetails'; -import Typography from '@mui/material/Typography'; -import Button from '@mui/material/Button'; -import AppBar from '@mui/material/AppBar'; -import Drawer from '@mui/material/Drawer'; -import Toolbar from '@mui/material/Toolbar'; -import SettingsIcon from '@mui/icons-material/Settings'; -import CloseIcon from '@mui/icons-material/Close'; -import IconButton, { IconButtonProps } from '@mui/material/IconButton'; -import Box from '@mui/material/Box'; -import CssBaseline from '@mui/material/CssBaseline'; -import ResetIcon from '@mui/icons-material/History'; -import SendIcon from '@mui/icons-material/Send'; -import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; -import Card from '@mui/material/Card'; -import CardContent from '@mui/material/CardContent'; -import CardActions from '@mui/material/CardActions'; -import Collapse from '@mui/material/Collapse'; -import Table from '@mui/material/Table'; -import TableBody from '@mui/material/TableBody'; -import TableCell from '@mui/material/TableCell'; -import TableContainer from '@mui/material/TableContainer'; -import TableHead from '@mui/material/TableHead'; -import TableRow from '@mui/material/TableRow'; - -import PropagateLoader from "react-spinners/PropagateLoader"; - -import { MuiMarkdown } from "mui-markdown"; -import ReactMarkdown from 'react-markdown'; -import rehypeKatex from 'rehype-katex' -import remarkMath from 'remark-math' -import 'katex/dist/katex.min.css' // `rehype-katex` does not import the CSS for you - -import './App.css'; - -import '@fontsource/roboto/300.css'; -import '@fontsource/roboto/400.css'; -import '@fontsource/roboto/500.css'; -import '@fontsource/roboto/700.css'; - -//const use_mui_markdown = true -const use_mui_markdown = true - -const welcomeMarkdown = ` -# Welcome to AIRC - -This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website. - -You can ask things like: - * - * - * - * - * - -Or click the text above to submit that query. - -As with all LLM interactions, the results may not be 100% accurate. If you have questions about my career, I'd love to hear from you. You can send me an email at **james_airc@ketrenos.com**. -`; - -const welcomeMessage = { - "role": "assistant", "content": welcomeMarkdown -}; -const loadingMessage = { "role": "assistant", "content": "Instancing chat session..." }; - -type Tool = { - type: string, - function?: { - name: string, - description: string, - parameters?: any, - returns?: any - }, - name?: string, - description?: string, - enabled: boolean -}; - -type SeverityType = 'error' | 'info' | 'success' | 'warning' | undefined; - -interface ControlsParams { - tools: Tool[], - rags: Tool[], - systemPrompt: string, - systemInfo: SystemInfo, - toggleTool: (tool: Tool) => void, - toggleRag: (tool: Tool) => void, - setSystemPrompt: (prompt: string) => void, - reset: (types: ("rags" | "tools" | "history" | "system-prompt" | "message-history-length")[], message: string) => Promise - messageHistoryLength: number, - setMessageHistoryLength: (messageHistoryLength: number) => void, -}; - -type GPUInfo = { - name: string, - memory: number, - discrete: boolean -} -type SystemInfo = { - "Installed RAM": string, - "Graphics Card": GPUInfo[], - "CPU": string -}; - -type MessageMetadata = { - rag: any, - tools: any[], - eval_count: number, - eval_duration: number, - prompt_eval_count: number, - prompt_eval_duration: number -}; - -type MessageData = { - role: string, - content: string, - user?: string, - type?: string, - id?: string, - isProcessing?: boolean, - metadata?: MessageMetadata -}; - -type MessageList = MessageData[]; - - -const getConnectionBase = (loc: any): string => { - if (!loc.host.match(/.*battle-linux.*/)) { - return loc.protocol + "//" + loc.host; - } else { - return loc.protocol + "//battle-linux.ketrenos.com:5000"; - } -} - -const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => { - const [systemElements, setSystemElements] = useState([]); - - const convertToSymbols = (text: string) => { - return text - .replace(/\(R\)/g, '®') // Replace (R) with the ® symbol - .replace(/\(C\)/g, '©') // Replace (C) with the © symbol - .replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol - }; - - useEffect(() => { - const elements = Object.entries(systemInfo).flatMap(([k, v]) => { - // If v is an array, repeat for each card - if (Array.isArray(v)) { - return v.map((card, index) => ( -
-
{convertToSymbols(k)} {index}
-
{convertToSymbols(card.name)} {card.discrete ? `w/ ${Math.round(card.memory / (1024 * 1024 * 1024))}GB RAM` : "(integrated)"}
-
- )); - } - - // If it's not an array, handle normally - return ( -
-
{convertToSymbols(k)}
-
{convertToSymbols(String(v))}
-
- ); - }); - - setSystemElements(elements); - }, [systemInfo]); - - return
{systemElements}
; -}; - -const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, messageHistoryLength, setMessageHistoryLength, setSystemPrompt, reset, systemInfo }: ControlsParams) => { - const [editSystemPrompt, setEditSystemPrompt] = useState(systemPrompt); - - useEffect(() => { - setEditSystemPrompt(systemPrompt); - }, [systemPrompt, setEditSystemPrompt]); - - const toggle = async (type: string, index: number) => { - switch (type) { - case "rag": - toggleRag(rags[index]) - break; - case "tool": - toggleTool(tools[index]); - } - }; - - const handleKeyPress = (event: any) => { - if (event.key === 'Enter' && event.ctrlKey) { - switch (event.target.id) { - case 'SystemPromptInput': - setSystemPrompt(editSystemPrompt); - break; - } - } - }; - - return (
- - You can change the information available to the LLM by adjusting the following settings: - - - - }> - System Prompt - - - setEditSystemPrompt(e.target.value)} - onKeyDown={handleKeyPress} - placeholder="Enter the new system prompt.." - id="SystemPromptInput" - /> -
- - -
-
-
- - }> - Tunables - - - setMessageHistoryLength(e.target.value)} - slotProps={{ - htmlInput: { - min: 0 - }, - inputLabel: { - shrink: true, - }, - }} - /> - - - - }> - Tools - - - These tools can be made available to the LLM for obtaining real-time information from the Internet. The description provided to the LLM is provided for reference. - - - - { - tools.map((tool, index) => - - - } onChange={() => toggle("tool", index)} label={tool?.function?.name} /> - {tool?.function?.description} - - ) - } - - - - }> - RAG - - - These RAG databases can be enabled / disabled for adding additional context based on the chat request. - - - - { - rags.map((rag, index) => - - - } onChange={() => toggle("rag", index)} label={rag?.name} /> - {rag?.description} - - ) - } - - - - }> - System Information - - - The server is running on the following hardware: - - - - - - - -
); -} - -interface ExpandMoreProps extends IconButtonProps { - expand: boolean; -} - -const ExpandMore = styled((props: ExpandMoreProps) => { - const { expand, ...other } = props; - return ; -})(({ theme }) => ({ - marginLeft: 'auto', - transition: theme.transitions.create('transform', { - duration: theme.transitions.duration.shortest, - }), - variants: [ - { - props: ({ expand }) => !expand, - style: { - transform: 'rotate(0deg)', - }, - }, - { - props: ({ expand }) => !!expand, - style: { - transform: 'rotate(180deg)', - }, - }, - ], -})); - -interface MessageInterface { - message: MessageData, - submitQuery: (text: string) => void -}; - -interface MessageMetaInterface { - metadata: MessageMetadata -} -const MessageMeta = ({ metadata }: MessageMetaInterface) => { - if (metadata === undefined) { - return <> - } - - return (<> - - Below is the LLM performance of this query. Note that if tools are called, the entire context is processed for each separate tool request by the LLM. This can dramatically increase the total time for a response. - - - - - - - Tokens - Time (s) - TPS - - - - - Prompt - {metadata.prompt_eval_count} - {Math.round(metadata.prompt_eval_duration / 10 ** 7) / 100} - {Math.round(metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration)} - - - Response - {metadata.eval_count} - {Math.round(metadata.eval_duration / 10 ** 7) / 100} - {Math.round(metadata.eval_count * 10 ** 9 / metadata.eval_duration)} - - - Total - {metadata.prompt_eval_count + metadata.eval_count} - {Math.round((metadata.prompt_eval_duration + metadata.eval_duration) / 10 ** 7) / 100} - {Math.round((metadata.prompt_eval_count + metadata.eval_count) * 10 ** 9 / (metadata.prompt_eval_duration + metadata.eval_duration))} - - -
-
- { - metadata.tools !== undefined && metadata.tools.length !== 0 && - - }> - - Tools queried - - - - {metadata.tools.map((tool: any, index: number) => - {index !== 0 && } - -
- {tool.tool} -
-
{JSON.stringify(tool.result, null, 2)}
-
-
)} -
-
- } - { - metadata.rag.name !== undefined && - - }> - - Top RAG {metadata.rag.ids.length} matches from '{metadata.rag.name}' collection against embedding vector of {metadata.rag.query_embedding.length} dimensions - - - - {metadata.rag.ids.map((id: number, index: number) => - {index !== 0 && } - -
-
Doc ID: {metadata.rag.ids[index]}
-
Similarity: {Math.round(metadata.rag.distances[index] * 100) / 100}
-
Type: {metadata.rag.metadatas[index].doc_type}
-
Chunk Len: {metadata.rag.documents[index].length}
-
-
{metadata.rag.documents[index]}
-
-
- )} -
-
- } - - ); -}; - -interface ChatQueryInterface { - text: string, - submitQuery: (text: string) => void -} - -const ChatQuery = ({ text, submitQuery }: ChatQueryInterface) => { - return (); -} - -const Message = ({ message, submitQuery }: MessageInterface) => { - const [expanded, setExpanded] = React.useState(false); - - const handleExpandClick = () => { - setExpanded(!expanded); - }; - - const formattedContent = message.content.trim(); - - return ( - - - {message.role === 'assistant' ? - use_mui_markdown ? : - : - - {message.content} - - } - - {message.metadata && <> - - LLM information for this query - - - - - - - - - - } - - ); -} - -type ContextStatus = { - context_used: number, - max_context: number -}; - -const App = () => { - const [query, setQuery] = useState(''); - const [conversation, setConversation] = useState([]); - const conversationRef = useRef(null); - const [processing, setProcessing] = useState(false); - const [sessionId, setSessionId] = useState(undefined); - const [loc,] = useState(window.location) - const [mobileOpen, setMobileOpen] = useState(false); - const [isClosing, setIsClosing] = useState(false); - const [snackOpen, setSnackOpen] = useState(false); - const [snackMessage, setSnackMessage] = useState(""); - const [snackSeverity, setSnackSeverity] = useState("success"); - const [tools, setTools] = useState([]); - const [rags, setRags] = useState([]); - const [systemPrompt, setSystemPrompt] = useState(""); - const [serverSystemPrompt, setServerSystemPrompt] = useState(""); - const [systemInfo, setSystemInfo] = useState(undefined); - const [contextStatus, setContextStatus] = useState({ context_used: 0, max_context: 0 }); - const [contextWarningShown, setContextWarningShown] = useState(false); - const [contextUsedPercentage, setContextUsedPercentage] = useState(0); - const [lastEvalTPS, setLastEvalTPS] = useState(35); - const [lastPromptTPS, setLastPromptTPS] = useState(430); - const [countdown, setCountdown] = useState(0); - const [messageHistoryLength, setMessageHistoryLength] = useState(5); - - const timerRef = useRef(null); - - const startCountdown = (seconds: number) => { - if (timerRef.current) clearInterval(timerRef.current); - setCountdown(seconds); - timerRef.current = setInterval(() => { - setCountdown((prev) => { - if (prev <= 1) { - clearInterval(timerRef.current); - timerRef.current = null; - if (isScrolledToBottom()) { - setTimeout(() => { - scrollToBottom(); - }, 50) - } - return 0; - } - return prev - 1; - }); - }, 1000); - }; - - const stopCountdown = () => { - if (timerRef.current) { - clearInterval(timerRef.current); - timerRef.current = null; - setCountdown(0); - } - }; - - const isScrolledToBottom = useCallback(()=> { - // Current vertical scroll position - const scrollTop = window.scrollY || document.documentElement.scrollTop; - - // Total height of the page content - const scrollHeight = document.documentElement.scrollHeight; - - // Height of the visible window - const clientHeight = document.documentElement.clientHeight; - - // If we're at the bottom (allowing a small buffer of 16px) - return scrollTop + clientHeight >= scrollHeight - 16; - }, []); - - const scrollToBottom = useCallback(() => { - console.log("Scroll to bottom"); - window.scrollTo({ - top: document.body.scrollHeight, - }); - }, []); - - // Set the snack pop-up and open it - const setSnack = useCallback((message: string, severity: SeverityType = "success") => { - setSnackMessage(message); - setSnackSeverity(severity); - setSnackOpen(true); - }, []); - - // Get the system information - useEffect(() => { - if (systemInfo !== undefined || sessionId === undefined) { - return; - } - fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }) - .then(response => response.json()) - .then(data => { - setSystemInfo(data); - }) - .catch(error => { - console.error('Error obtaining system information:', error); - setSnack("Unable to obtain system information.", "error"); - }); - }, [systemInfo, setSystemInfo, loc, setSnack, sessionId]) - - // Update the context status - const updateContextStatus = useCallback(() => { - fetch(getConnectionBase(loc) + `/api/context-status/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }) - .then(response => response.json()) - .then(data => { - setContextStatus(data); - }) - .catch(error => { - console.error('Error getting context status:', error); - setSnack("Unable to obtain context status.", "error"); - }); - }, [setContextStatus, loc, setSnack, sessionId]); - - // Set the initial chat history to "loading" or the welcome message if loaded. - useEffect(() => { - if (sessionId === undefined) { - setConversation([loadingMessage]); - } else { - fetch(getConnectionBase(loc) + `/api/history/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }) - .then(response => response.json()) - .then(data => { - console.log(`Session id: ${sessionId} -- history returned from server with ${data.length} entries`) - setConversation([ - welcomeMessage, - ...data - ]); - }) - .catch(error => { - console.error('Error generating session ID:', error); - setSnack("Unable to obtain chat history.", "error"); - }); - updateContextStatus(); - } - }, [sessionId, setConversation, updateContextStatus, loc, setSnack]); - - // Extract the sessionId from the URL if present, otherwise - // request a sessionId from the server. - useEffect(() => { - const url = new URL(loc.href); - const pathParts = url.pathname.split('/').filter(Boolean); - - if (!pathParts.length) { - console.log("No session id -- creating a new session") - fetch(getConnectionBase(loc) + `/api/context`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - }) - .then(response => response.json()) - .then(data => { - console.log(`Session id: ${data.id} -- returned from server`) - setSessionId(data.id); - window.history.replaceState({}, '', `/${data.id}`); - }) - .catch(error => console.error('Error generating session ID:', error)); - } else { - console.log(`Session id: ${pathParts[0]} -- existing session`) - setSessionId(pathParts[0]); - } - - }, [setSessionId, loc]); - - // If the systemPrompt has not been set, fetch it from the server - useEffect(() => { - if (serverSystemPrompt !== "" || sessionId === undefined) { - return; - } - const fetchTunables = async () => { - // Make the fetch request with proper headers - const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - }); - const data = await response.json(); - const serverSystemPrompt = data["system-prompt"].trim(); - setServerSystemPrompt(serverSystemPrompt); - setSystemPrompt(serverSystemPrompt); - setMessageHistoryLength(data["message-history-length"]); - } - - fetchTunables(); - }, [sessionId, serverSystemPrompt, setServerSystemPrompt, loc]); - - // If the tools have not been set, fetch them from the server - useEffect(() => { - if (tools.length || sessionId === undefined) { - return; - } - const fetchTools = async () => { - try { - // Make the fetch request with proper headers - const response = await fetch(getConnectionBase(loc) + `/api/tools/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - }); - if (!response.ok) { - throw Error(); - } - const tools = await response.json(); - setTools(tools); - } catch (error: any) { - setSnack("Unable to fetch tools", "error"); - console.error(error); - } - } - - fetchTools(); - }, [sessionId, tools, setTools, setSnack, loc]); - - // If the RAGs have not been set, fetch them from the server - useEffect(() => { - if (rags.length || sessionId === undefined) { - return; - } - const fetchRags = async () => { - try { - // Make the fetch request with proper headers - const response = await fetch(getConnectionBase(loc) + `/api/rags/${sessionId}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - }); - if (!response.ok) { - throw Error(); - } - const rags = await response.json(); - setRags(rags); - } catch (error: any) { - setSnack("Unable to fetch RAGs", "error"); - console.error(error); - } - } - - fetchRags(); - }, [sessionId, rags, setRags, setSnack, loc]); - - // If context status changes, show a warning if necessary. If it drops - // back below the threshold, clear the warning trigger - useEffect(() => { - const context_used_percentage = Math.round(100 * contextStatus.context_used / contextStatus.max_context); - if (context_used_percentage >= 90 && !contextWarningShown) { - setSnack(`${context_used_percentage}% of context used. You may wish to start a new chat.`, "warning"); - setContextWarningShown(true); - } - if (context_used_percentage < 90 && contextWarningShown) { - setContextWarningShown(false); - } - setContextUsedPercentage(context_used_percentage) - }, [contextStatus, setContextWarningShown, contextWarningShown, setContextUsedPercentage, setSnack]); - - - const toggleRag = async (tool: Tool) => { - tool.enabled = !tool.enabled - try { - const response = await fetch(getConnectionBase(loc) + `/api/rags/${sessionId}`, { - method: 'PUT', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ "tool": tool?.name, "enabled": tool.enabled }), - }); - - const rags = await response.json(); - setRags([...rags]) - setSnack(`${tool?.name} ${tool.enabled ? "enabled" : "disabled"}`); - } catch (error) { - console.error('Fetch error:', error); - setSnack(`${tool?.name} ${tool.enabled ? "enabling" : "disabling"} failed.`, "error"); - tool.enabled = !tool.enabled - } - }; - - const toggleTool = async (tool: Tool) => { - tool.enabled = !tool.enabled - try { - const response = await fetch(getConnectionBase(loc) + `/api/tools/${sessionId}`, { - method: 'PUT', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ "tool": tool?.function?.name, "enabled": tool.enabled }), - }); - - const tools = await response.json(); - setTools([...tools]) - setSnack(`${tool?.function?.name} ${tool.enabled ? "enabled" : "disabled"}`); - } catch (error) { - console.error('Fetch error:', error); - setSnack(`${tool?.function?.name} ${tool.enabled ? "enabling" : "disabling"} failed.`, "error"); - tool.enabled = !tool.enabled - } - }; - - useEffect(() => { - if (systemPrompt === serverSystemPrompt || !systemPrompt.trim() || sessionId === undefined) { - return; - } - const sendSystemPrompt = async (prompt: string) => { - try { - const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, { - method: 'PUT', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ "system-prompt": prompt }), - }); - - const data = await response.json(); - const newPrompt = data["system-prompt"]; - if (newPrompt !== serverSystemPrompt) { - setServerSystemPrompt(newPrompt); - setSystemPrompt(newPrompt) - setSnack("System prompt updated", "success"); - } - } catch (error) { - console.error('Fetch error:', error); - setSnack("System prompt update failed", "error"); - } - }; - - sendSystemPrompt(systemPrompt); - - }, [systemPrompt, setServerSystemPrompt, serverSystemPrompt, loc, sessionId, setSnack]); - - useEffect(() => { - if (sessionId === undefined) { - return; - } - const sendMessageHistoryLength = async (length: number) => { - try { - const response = await fetch(getConnectionBase(loc) + `/api/tunables/${sessionId}`, { - method: 'PUT', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ "message-history-length": length }), - }); - - const data = await response.json(); - const newLength = data["message-history-length"]; - if (newLength !== messageHistoryLength) { - setMessageHistoryLength(newLength); - setSnack("Message history length updated", "success"); - } - } catch (error) { - console.error('Fetch error:', error); - setSnack("Message history length update failed", "error"); - } - }; - - sendMessageHistoryLength(messageHistoryLength); - - }, [messageHistoryLength, setMessageHistoryLength, loc, sessionId, setSnack]); - - const reset = async (types: ("rags" | "tools" | "history" | "system-prompt" | "message-history-length")[], message: string = "Update successful.") => { - try { - const response = await fetch(getConnectionBase(loc) + `/api/reset/${sessionId}`, { - method: 'PUT', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ "reset": types }), - }); - - if (response.ok) { - const data = await response.json(); - if (data.error) { - throw Error() - } - for (const [key, value] of Object.entries(data)) { - switch (key) { - case "rags": - setRags(value as Tool[]); - break; - case "tools": - setTools(value as Tool[]); - break; - case "system-prompt": - setServerSystemPrompt((value as any)["system-prompt"].trim()); - setSystemPrompt((value as any)["system-prompt"].trim()); - break; - case "history": - setConversation([welcomeMessage]); - break; - } - } - setSnack(message, "success"); - } else { - throw Error(`${{ status: response.status, message: response.statusText }}`); - } - } catch (error) { - console.error('Fetch error:', error); - setSnack("Unable to restore defaults", "error"); - } - }; - - const handleDrawerClose = () => { - setIsClosing(true); - setMobileOpen(false); - }; - - const handleDrawerTransitionEnd = () => { - setIsClosing(false); - }; - - const handleDrawerToggle = () => { - if (!isClosing) { - setMobileOpen(!mobileOpen); - } - }; - - const drawer = ( - <> - {sessionId !== undefined && systemInfo !== undefined && - } - - ); - - const submitQuery = (text: string) => { - sendQuery(text); - } - - const handleKeyPress = (event: any) => { - if (event.key === 'Enter') { - switch (event.target.id) { - case 'QueryInput': - sendQuery(query); - break; - } - } - }; - - const onNew = async () => { - reset(["history"], "New chat started."); - } - - const sendQuery = async (query: string) => { - if (!query.trim()) return; - - const userMessage = [{ role: 'user', content: query }]; - - let scrolledToBottom; - - // Add user message to conversation - const newConversation: MessageList = [ - ...conversation, - ...userMessage - ]; - setConversation(newConversation); - scrollToBottom(); - - // Clear input - setQuery(''); - - try { - scrolledToBottom = isScrolledToBottom(); - setProcessing(true); - // Create a unique ID for the processing message - const processingId = Date.now().toString(); - - // Add initial processing message - setConversation(prev => [ - ...prev, - { role: 'assistant', content: 'Processing request...', id: processingId, isProcessing: true } - ]); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - - // Make the fetch request with proper headers - const response = await fetch(getConnectionBase(loc) + `/api/chat/${sessionId}`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - }, - body: JSON.stringify({ role: 'user', content: query.trim() }), - }); - - // We'll guess that the response will be around 500 tokens... - const token_guess = 500; - const estimate = Math.round(token_guess / lastEvalTPS + contextStatus.context_used / lastPromptTPS); - - scrolledToBottom = isScrolledToBottom(); - setSnack(`Query sent. Response estimated in ${estimate}s.`, "info"); - startCountdown(Math.round(estimate)); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - - if (!response.ok) { - throw new Error(`Server responded with ${response.status}: ${response.statusText}`); - } - - if (!response.body) { - throw new Error('Response body is null'); - } - - // Set up stream processing with explicit chunking - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - - const chunk = decoder.decode(value, { stream: true }); - - // Process each complete line immediately - buffer += chunk; - let lines = buffer.split('\n'); - buffer = lines.pop() || ''; // Keep incomplete line in buffer - for (const line of lines) { - if (!line.trim()) continue; - - try { - const update = JSON.parse(line); - - // Force an immediate state update based on the message type - if (update.status === 'processing') { - scrolledToBottom = isScrolledToBottom(); - // Update processing message with immediate re-render - setConversation(prev => prev.map(msg => - msg.id === processingId - ? { ...msg, content: update.message } - : msg - )); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - - // Add a small delay to ensure React has time to update the UI - await new Promise(resolve => setTimeout(resolve, 0)); - - } else if (update.status === 'done') { - // Replace processing message with final result - scrolledToBottom = isScrolledToBottom(); - setConversation(prev => [ - ...prev.filter(msg => msg.id !== processingId), - update.message - ]); - const metadata = update.message.metadata; - const evalTPS = metadata.eval_count * 10 ** 9 / metadata.eval_duration; - const promptTPS = metadata.prompt_eval_count * 10 ** 9 / metadata.prompt_eval_duration; - setLastEvalTPS(evalTPS ? evalTPS : 35); - setLastPromptTPS(promptTPS ? promptTPS : 35); - updateContextStatus(); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - } else if (update.status === 'error') { - // Show error - scrolledToBottom = isScrolledToBottom(); - setConversation(prev => [ - ...prev.filter(msg => msg.id !== processingId), - { role: 'assistant', type: 'error', content: update.message } - ]); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - } - } catch (e) { - setSnack("Error processing query", "error") - console.error('Error parsing JSON:', e, line); - } - } - } - - // Process any remaining buffer content - if (buffer.trim()) { - try { - const update = JSON.parse(buffer); - - if (update.status === 'done') { - scrolledToBottom = isScrolledToBottom(); - setConversation(prev => [ - ...prev.filter(msg => msg.id !== processingId), - update.message - ]); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 500); - } - } - } catch (e) { - setSnack("Error processing query", "error") - } - } - - scrolledToBottom = isScrolledToBottom(); - stopCountdown(); - setProcessing(false); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - } catch (error) { - console.error('Fetch error:', error); - setSnack("Unable to process query", "error"); - scrolledToBottom = isScrolledToBottom(); - setConversation(prev => [ - ...prev.filter(msg => !msg.isProcessing), - { role: 'assistant', type: 'error', content: `Error: ${error}` } - ]); - setProcessing(false); - stopCountdown(); - if (scrolledToBottom) { - setTimeout(() => { scrollToBottom() }, 50); - } - } - }; - - const handleSnackClose = ( - event: React.SyntheticEvent | Event, - reason?: SnackbarCloseReason, - ) => { - if (reason === 'clickaway') { - return; - } - - setSnackOpen(false); - }; - - const Offset = styled('div')(({ theme }) => theme.mixins.toolbar); - - return ( - - - theme.zIndex.drawer + 1, - }} - > - - - - - - - - - - - - - - ai.ketrenos.com - - - { - mobileOpen === true && - - - - - - } - - - - - - - - - {/* The implementation can be swapped with js to avoid SEO duplication of links. */} - - - {drawer} - - - - - {conversation.map((message, index) => )} - - - {processing === true && countdown > 0 && ( - Estimated response time: {countdown}s - )} - - - Context used: {contextUsedPercentage}% {contextStatus.context_used}/{contextStatus.max_context} - { - contextUsedPercentage >= 90 ? WARNING: Context almost exhausted. You should start a new chat. - : (contextUsedPercentage >= 50 ? NOTE: Context is getting long. Queries will be slower, and the LLM may stop issuing tool calls. - : <>) - } - - - - setQuery(e.target.value)} - onKeyDown={handleKeyPress} - placeholder="Enter your question..." - id="QueryInput" - /> - - - - - - - - - - - - - {snackMessage} - - - - ); -}; - -export default App; \ No newline at end of file diff --git a/src/server.py b/src/server.py index c5ec88d..5f31fb7 100644 --- a/src/server.py +++ b/src/server.py @@ -6,14 +6,8 @@ import asyncio import json import logging import os -import queue import re -import time -from datetime import datetime -import textwrap -import threading import uuid -import random import subprocess import re import math @@ -26,29 +20,15 @@ def try_import(module_name, pip_name=None): print(f" pip install {pip_name or module_name}") # Third-party modules with import checks -try_import('gradio') try_import('ollama') -try_import('pytz') try_import('requests') -try_import('yfinance', 'yfinance') -try_import('dotenv', 'python-dotenv') -try_import('geopy', 'geopy') -try_import('hyphen', 'PyHyphen') try_import('bs4', 'beautifulsoup4') -try_import('nltk') try_import('fastapi') -import nltk -from dotenv import load_dotenv -from geopy.geocoders import Nominatim -import gradio as gr import ollama -import pytz import requests -import yfinance as yf -from hyphen import hyphenator from bs4 import BeautifulSoup -from fastapi import FastAPI, HTTPException, BackgroundTasks, Request +from fastapi import FastAPI, Request from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse from fastapi.middleware.cors import CORSMiddleware @@ -143,7 +123,7 @@ MODEL_NAME = defines.model LOG_LEVEL="info" USE_TLS=False WEB_HOST="0.0.0.0" -WEB_PORT=5000 +WEB_PORT=8911 DEFAULT_HISTORY_LENGTH=5 # %% @@ -157,14 +137,37 @@ When answering queries, follow these steps: 1. First analyze the query to determine if real-time information might be helpful 2. Even when [{context_tag}] is provided, consider whether the tools would provide more current or comprehensive information 3. Use the provided tools whenever they would enhance your response, regardless of whether context is also available +4. When presenting information like weather forecasts, include relevant emojis immediately before the corresponding text. For example, for a sunny day, say \"☀️ Sunny\" or if the forecast says there will be \"rain showers, say \"🌧️ Rain showers\". Use this mapping for weather emojis: Sunny: ☀️, Cloudy: ☁️, Rainy: 🌧️, Snowy: ❄️ 4. When both [{context_tag}] and tool outputs are relevant, synthesize information from both sources to provide the most complete answer 5. Always prioritize the most up-to-date and relevant information, whether it comes from [{context_tag}] or tools 6. If [{context_tag}] and tool outputs contain conflicting information, prefer the tool outputs as they likely represent more current data Always use tools and [{context_tag}] when possible. Be concise, and never make up information. If you do not know the answer, say so. - """.strip() +system_generate_resume = f""" +You are a professional resume writer. Your task is to write a poliched, tailored resume for a specific job based only on the individual's [WORK HISTORY]. + +When answering queries, follow these steps: + +1. You must not invent or assume any inforation not explicitly present in the [WORK HISTORY]. +2. Analyze the [JOB DESCRIPTION] to identify skills required for the job. +3. Use the [JOB DESCRIPTION] provided to guide the focus, tone, and relevant skills or experience to highlight. +4. Identify and emphasisze the experiences, achievements, and responsibilities from the [WORK HISTORY] that best align with the [JOB DESCRIPTION]. +5. Do not use the [JOB DESCRIPTION] skills as skills the user posseses unless listed in [WORK HISTORY]. + +Structure the resume professionally with the following sections where applicable: + +* "Name: Use full name." +* "Professional Summary: A 2-4 sentence overview tailored to the job." +* "Skills: A bullet list of key skills derived from the work history and relevant to the job." +* Professional Experience: A detailed list of roles, achievements, and responsibilities from the work history that relate to the job." +* Education: Include only if available in the work history." + +Do not include any information unless it is supported by the provided [WORK HISTORY]. +Ensure the langauge is clear, concise, and aligned with industry standards for professional resumes. +""" + tool_log = [] command_log = [] model = None @@ -445,6 +448,9 @@ class WebServer: @self.app.post('/api/chat/{context_id}') async def chat_endpoint(context_id: str, request: Request): + if not is_valid_uuid(context_id): + logging.warning(f"Invalid context_id: {context_id}") + return JSONResponse({"error": "Invalid context_id"}, status_code=400) context = self.upsert_context(context_id) data = await request.json() @@ -468,7 +474,36 @@ class WebServer: "X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it } ) - + + @self.app.post('/api/generate-resume/{context_id}') + async def post_generate_resume(context_id: str, request: Request): + if not is_valid_uuid(context_id): + logging.warning(f"Invalid context_id: {context_id}") + return JSONResponse({"error": "Invalid context_id"}, status_code=400) + context = self.upsert_context(context_id) + data = await request.json() + + # Create a custom generator that ensures flushing + async def flush_generator(): + async for message in self.generate_resume(context=context, content=data['content']): + # Convert to JSON and add newline + yield json.dumps(message) + "\n" + # Save the history as its generated + self.save_context(context_id) + # Explicitly flush after each yield + await asyncio.sleep(0) # Allow the event loop to process the write + + # Return StreamingResponse with appropriate headers + return StreamingResponse( + flush_generator(), + media_type="application/json", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no" # Prevents Nginx buffering if you're using it + } + ) + @self.app.post('/api/context') async def create_context(): context = self.create_context() @@ -542,12 +577,12 @@ class WebServer: @self.app.get('/{path:path}') async def serve_static(path: str): - full_path = os.path.join('/opt/airc/ketr-chat/build', path) + full_path = os.path.join(defines.static_content, path) if os.path.exists(full_path) and os.path.isfile(full_path): self.logging.info(f"Serve static request for {full_path}") return FileResponse(full_path) self.logging.info(f"Serve index.html for {path}") - return FileResponse('/opt/airc/ketr-chat/build/index.html') + return FileResponse(os.path.join(defines.static_content, 'index.html')) import requests @@ -565,11 +600,11 @@ class WebServer: context = self.upsert_context(session_id) # Create sessions directory if it doesn't exist - if not os.path.exists("sessions"): - os.makedirs("sessions") + if not os.path.exists(defines.session_dir): + os.makedirs(defines.session_dir) # Create the full file path - file_path = os.path.join("sessions", session_id) + file_path = os.path.join(defines.session_dir, session_id) # Serialize the data to JSON and write to file with open(file_path, 'w') as f: @@ -587,7 +622,7 @@ class WebServer: Returns: The deserialized dictionary, or a new context if it doesn't exist on disk. """ - file_path = os.path.join("sessions", session_id) + file_path = os.path.join(defines.session_dir, session_id) # Check if the file exists if not os.path.exists(file_path): @@ -606,9 +641,11 @@ class WebServer: context = { "id": context_id, "system": system_context, + "system_generate_resume": system_generate_resume, "llm_history": [], "user_history": [], "tools": default_tools(tools), + "resume_history": [], "rags": rags.copy(), "context_tokens": round(len(str(system_context)) * 3 / 4), # Estimate context usage "message_history_length": 5 # Number of messages to supply in context @@ -681,10 +718,9 @@ class WebServer: messages = context["system"] + llm_history try: - yield {"status": "processing", "message": "Processing request..."} - # Estimate token length of new messages ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=llm_history[-1]["content"]) + yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size} # Use the async generator in an async for loop response = self.client.chat(model=self.model, messages=messages, tools=llm_tools(context["tools"]), options={ 'num_ctx': ctx_size }) @@ -734,9 +770,9 @@ class WebServer: metadata["tools"] = tools_used - yield {"status": "processing", "message": "Generating final response..."} # Estimate token length of new messages ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=messages[pre_add_index:]) + yield {"status": "processing", "message": "Generating final response...", "num_ctx": ctx_size } # Decrease creativity when processing tool call requests response = self.client.chat(model=self.model, messages=messages, stream=False, options={ 'num_ctx': ctx_size }) #, "temperature": 0.5 }) metadata["eval_count"] += response['eval_count'] @@ -756,7 +792,7 @@ class WebServer: user_history.append(final_message) # Return the REST API with metadata - yield {"status": "done", "message": final_message, "metadata": metadata} + yield {"status": "done", "message": final_message } except Exception as e: logging.exception({ 'model': self.model, 'messages': messages, 'error': str(e) }) @@ -765,7 +801,79 @@ class WebServer: finally: self.processing = False - def run(self, host='0.0.0.0', port=5000, **kwargs): + async def generate_resume(self, context, content): + content = content.strip() + if not content: + yield {"status": "error", "message": "Invalid request"} + return + + if self.processing: + yield {"status": "error", "message": "Busy"} + return + + self.processing = True + resume_history = context["resume_history"] + + metadata = { + "rag": {}, + "tools": [], + "eval_count": 0, + "eval_duration": 0, + "prompt_eval_count": 0, + "prompt_eval_duration": 0, + } + rag_docs = [] + for rag in context["rags"]: + if rag["enabled"] and rag["name"] == "JPK": # Only support JPK rag right now... + yield {"status": "processing", "message": f"Checking RAG context {rag['name']}..."} + chroma_results = Rag.find_similar(llm=self.client, collection=self.collection, query=content, top_k=10) + if chroma_results: + rag_docs.extend(chroma_results["documents"]) + metadata["rag"] = { "name": rag["name"], **chroma_results } + preamble = f"The current time is {DateTime()}\n" + if len(rag_docs): + preamble = f"""[WORK HISTORY]:\n""" + for doc in rag_docs: + preamble += doc + preamble += f"\n[/WORK HISTORY]\n" + + content = f"{preamble}\nUse the above WORK HISTORY to create the resume for this JOB DESCRIPTION. Do not use the JOB DESCRIPTION skills as skills the user posseses unless listed in WORK HISTORY:\n[JOB DESCRIPTION]\n{content}\n[/JOB DESCRIPTION]\n" + + try: + # Estimate token length of new messages + ctx_size = self.get_optimal_ctx_size(context["context_tokens"], messages=[system_generate_resume, content]) + + yield {"status": "processing", "message": "Processing request...", "num_ctx": ctx_size} + + # Use the async generator in an async for loop + response = self.client.generate(model=self.model, system=system_generate_resume, prompt=content, options={ 'num_ctx': ctx_size }) + metadata["eval_count"] += response['eval_count'] + metadata["eval_duration"] += response['eval_duration'] + metadata["prompt_eval_count"] += response['prompt_eval_count'] + metadata["prompt_eval_duration"] += response['prompt_eval_duration'] + context["context_tokens"] = response['prompt_eval_count'] + response['eval_count'] + + reply = response['response'] + final_message = {"role": "assistant", "content": reply, "metadata": metadata } + + resume_history.append({ + 'job_description': content, + 'resume': reply, + 'metadata': metadata + }) + + # Return the REST API with metadata + yield {"status": "done", "message": final_message } + + except Exception as e: + logging.exception({ 'model': self.model, 'content': content, 'error': str(e) }) + yield {"status": "error", "message": f"An error occurred: {str(e)}"} + + finally: + self.processing = False + + + def run(self, host='0.0.0.0', port=WEB_PORT, **kwargs): import uvicorn uvicorn.run(self.app, host=host, port=port) @@ -783,7 +891,7 @@ def main(): client = ollama.Client(host=args.ollama_server) model = args.ollama_model - documents = Rag.load_text_files("doc") + documents = Rag.load_text_files(defines.doc_dir) print(f"Documents loaded {len(documents)}") collection = Rag.get_vector_collection() chunks = Rag.create_chunks_from_documents(documents) diff --git a/src/utils/defines.py b/src/utils/defines.py index 9db5b59..2dbbe4f 100644 --- a/src/utils/defines.py +++ b/src/utils/defines.py @@ -4,5 +4,8 @@ ollama_api_url="http://ollama:11434" # Default Ollama local endpoint #model = "llama3.2" model="qwen2.5:7b" encoding_model="mxbai-embed-large" -persist_directory="./chromadb" -max_context = 2048*8*2 \ No newline at end of file +persist_directory="/root/.cache/chromadb" +max_context = 2048*8*2 +doc_dir = "/opt/backstory/docs/" +session_dir = "/opt/backstory/sessions" +static_content = '/opt/backstory/frontend/build' \ No newline at end of file diff --git a/src/utils/rag.py b/src/utils/rag.py index 281113d..df7b078 100644 --- a/src/utils/rag.py +++ b/src/utils/rag.py @@ -104,7 +104,7 @@ if __name__ == "__main__": # When running directly, use absolute imports import defines llm = ollama.Client(host=defines.ollama_api_url) - documents = load_text_files("doc") + documents = load_text_files(defines.doc_dir) print(f"Documents loaded {len(documents)}") collection = get_vector_collection() chunks = create_chunks_from_documents(documents) @@ -113,5 +113,6 @@ if __name__ == "__main__": print(f"Document types: {doc_types}") print(f"Vectorstore created with {collection.count()} documents") query = "Can you describe James Ketrenos' work history?" - top_docs = find_similar(llm, query, top_k=3) + top_docs = find_similar(llm, collection, query, top_k=3) print(top_docs) +