Refactored VectorVisualization
Added LoadingPage
This commit is contained in:
parent
5dc5e8415c
commit
bbd5cb3783
139
Dockerfile
139
Dockerfile
@ -1,74 +1,28 @@
|
||||
#
|
||||
# Build Pyton 3.11 for use in later stages
|
||||
#
|
||||
FROM ubuntu:oracular AS python-build
|
||||
FROM ubuntu:oracular AS python
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
# Instructions Dockerfied from:
|
||||
#
|
||||
# https://github.com/pytorch/pytorch
|
||||
#
|
||||
# and
|
||||
#
|
||||
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
|
||||
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
|
||||
#
|
||||
#
|
||||
# Install some utilities frequently used
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
gpg \
|
||||
wget \
|
||||
nano \
|
||||
rsync \
|
||||
jq \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
|
||||
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
|
||||
|
||||
# Install latest Python3
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
gpg-agent \
|
||||
less \
|
||||
libbz2-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
libreadline-dev \
|
||||
libssl-dev \
|
||||
libsqlite3-dev \
|
||||
llvm \
|
||||
nano \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# python3 \
|
||||
# python3-pip \
|
||||
# python3-venv \
|
||||
# python3-dev \
|
||||
|
||||
RUN /usr/sbin/update-ccache-symlinks
|
||||
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
||||
|
||||
# Build Python in /opt/..., install it locally, then remove the build environment
|
||||
# collapsed to a single docker layer.
|
||||
WORKDIR /opt
|
||||
ENV PYTHON_VERSION=3.11.9
|
||||
|
||||
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
|
||||
&& cd Python-${PYTHON_VERSION} \
|
||||
&& ./configure --prefix=/opt/python --enable-optimizations \
|
||||
&& make -j$(nproc) \
|
||||
&& make install \
|
||||
&& cd /opt \
|
||||
&& rm -rf Python-${PYTHON_VERSION}
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
python3-dev
|
||||
|
||||
FROM ubuntu:oracular AS ze-monitor
|
||||
# From https://github.com/jketreno/ze-monitor
|
||||
@ -101,58 +55,13 @@ RUN cmake .. \
|
||||
&& make \
|
||||
&& cpack
|
||||
|
||||
#
|
||||
# Build the ipex-llm wheel for use in later stages
|
||||
#
|
||||
FROM python-build AS ipex-llm-src
|
||||
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||
|
||||
RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
|
||||
&& cd /opt/ipex-llm \
|
||||
&& git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
|
||||
&& git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
|
||||
|
||||
WORKDIR /opt/ipex-llm
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||
} > /opt/ipex-llm/shell ; \
|
||||
chmod +x /opt/ipex-llm/shell
|
||||
|
||||
SHELL [ "/opt/ipex-llm/shell" ]
|
||||
|
||||
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||
|
||||
WORKDIR /opt/ipex-llm/python/llm
|
||||
RUN pip install requests wheel
|
||||
RUN python setup.py clean --all bdist_wheel --linux
|
||||
|
||||
#
|
||||
# The main backstory image:
|
||||
# * python 3.11
|
||||
# * pytorch xpu w/ ipex-llm
|
||||
# * ollama-ipex-llm
|
||||
# * src/server.py - model server supporting RAG and fine-tuned models
|
||||
#
|
||||
FROM ubuntu:oracular AS llm-base
|
||||
|
||||
COPY --from=python-build /opt/python /opt/python
|
||||
|
||||
# Get a couple prerequisites
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
gpg \
|
||||
# python3 \
|
||||
# python3-pip \
|
||||
# python3-venv \
|
||||
wget \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
FROM python AS llm-base
|
||||
|
||||
# Install Intel graphics runtimes
|
||||
RUN apt-get update \
|
||||
@ -168,13 +77,10 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||
|
||||
# Prerequisite for ze-monitor
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libncurses6 \
|
||||
rsync \
|
||||
jq \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
@ -190,7 +96,6 @@ RUN python3 -m venv --system-site-packages /opt/backstory/venv
|
||||
# Setup the docker pip shell
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/backstory/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||
@ -202,7 +107,7 @@ SHELL [ "/opt/backstory/shell" ]
|
||||
|
||||
# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
|
||||
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||
RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
#RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
# From https://huggingface.co/docs/bitsandbytes/main/en/installation?backend=Intel+CPU+%2B+GPU#multi-backend
|
||||
RUN pip install "transformers>=4.45.1"
|
||||
@ -218,8 +123,8 @@ RUN pip install tiktoken
|
||||
RUN pip install flask flask_cors flask_sock
|
||||
RUN pip install peft datasets
|
||||
|
||||
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
|
||||
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
|
||||
#COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
|
||||
#RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
|
||||
|
||||
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
|
||||
# as well as MistralSpda* and QwenSpda* things missing (needed when loading models with )
|
||||
@ -236,7 +141,8 @@ RUN pip install "sentence_transformers<3.4.1"
|
||||
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
|
||||
|
||||
# NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env
|
||||
RUN pip install einops diffusers # Required for IPEX optimize(), which is required to convert from Params4bit
|
||||
# Required for IPEX optimize(), which is required to convert from Params4bit
|
||||
RUN pip install einops diffusers
|
||||
|
||||
# Needed by src/utils/rag.py
|
||||
RUN pip install watchdog
|
||||
@ -263,7 +169,6 @@ RUN { \
|
||||
echo 'echo "Container: backstory"'; \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/backstory"'; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/backstory/venv/bin/activate'; \
|
||||
echo ''; \
|
||||
@ -362,7 +267,6 @@ RUN python3 -m venv --system-site-packages /opt/ollama/venv
|
||||
# Setup the docker pip shell
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/ollama/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \
|
||||
} > /opt/ollama/shell ; \
|
||||
@ -471,7 +375,6 @@ RUN { \
|
||||
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
|
||||
echo ' fi' ; \
|
||||
echo 'fi' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/backstory/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
|
||||
@ -498,9 +401,7 @@ RUN { \
|
||||
|
||||
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
|
||||
|
||||
FROM ubuntu:oracular AS miniircd
|
||||
|
||||
COPY --from=python-build /opt/python /opt/python
|
||||
FROM python AS miniircd
|
||||
|
||||
# Get a couple prerequisites
|
||||
RUN apt-get update \
|
||||
@ -514,15 +415,12 @@ RUN apt-get update \
|
||||
|
||||
WORKDIR /opt/miniircd
|
||||
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||
|
||||
# Setup the ollama python virtual environment
|
||||
RUN python3 -m venv --system-site-packages /opt/miniircd/venv
|
||||
|
||||
# Setup the docker pip shell
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'source /opt/miniircd/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||
} > /opt/miniircd/shell ; \
|
||||
@ -540,7 +438,6 @@ RUN { \
|
||||
echo 'echo "Container: miniircd"'; \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/miniircd"'; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'source /opt/miniircd/venv/bin/activate'; \
|
||||
echo ''; \
|
||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/miniircd/)?shell$ ]]; then'; \
|
||||
|
152
frontend/package-lock.json
generated
152
frontend/package-lock.json
generated
@ -25,12 +25,14 @@
|
||||
"@types/react-dom": "^19.0.4",
|
||||
"@uiw/react-json-view": "^2.0.0-alpha.31",
|
||||
"jsonrepair": "^3.12.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
"mermaid": "^11.6.0",
|
||||
"mui-markdown": "^2.0.1",
|
||||
"prism-react-renderer": "^2.4.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-markdown-it": "^1.0.2",
|
||||
"react-plotly.js": "^2.6.0",
|
||||
"react-router-dom": "^7.6.0",
|
||||
"react-scripts": "5.0.1",
|
||||
@ -43,6 +45,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@craco/craco": "^7.1.0",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/plotly.js": "^2.35.5"
|
||||
}
|
||||
},
|
||||
@ -5091,6 +5094,12 @@
|
||||
"integrity": "sha512-Gjm4+H9noDJgu5EdT3rUw5MhPBag46fiOy27BefvWkNL8mlZnKnCaVVVTLKj6RYXed9b62CPKnPav9govyQDzA==",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@types/linkify-it": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/linkify-it/-/linkify-it-5.0.0.tgz",
|
||||
"integrity": "sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/long": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
||||
@ -5113,6 +5122,16 @@
|
||||
"@types/pbf": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/markdown-it": {
|
||||
"version": "14.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/markdown-it/-/markdown-it-14.1.2.tgz",
|
||||
"integrity": "sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@types/linkify-it": "^5",
|
||||
"@types/mdurl": "^2"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/mdast": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz",
|
||||
@ -5121,6 +5140,12 @@
|
||||
"@types/unist": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/mdurl": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/mdurl/-/mdurl-2.0.0.tgz",
|
||||
"integrity": "sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/mime": {
|
||||
"version": "1.3.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/mime/-/mime-1.3.5.tgz",
|
||||
@ -11222,6 +11247,14 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stdin": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-4.0.1.tgz",
|
||||
"integrity": "sha512-F5aQMywwJ2n85s4hJPTT9RPxGmubonuB10MNYo17/xph174n2MIR33HRguhzVag10O/npM7SPk73LMZNP+FaWw==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stream": {
|
||||
"version": "6.0.1",
|
||||
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
|
||||
@ -14335,6 +14368,14 @@
|
||||
"resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
|
||||
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="
|
||||
},
|
||||
"node_modules/linkify-it": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.0.tgz",
|
||||
"integrity": "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==",
|
||||
"dependencies": {
|
||||
"uc.micro": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/loader-runner": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz",
|
||||
@ -14707,6 +14748,38 @@
|
||||
"node": "^16.13.0 || >=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/markdown-it": {
|
||||
"version": "14.1.0",
|
||||
"resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.1.0.tgz",
|
||||
"integrity": "sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==",
|
||||
"dependencies": {
|
||||
"argparse": "^2.0.1",
|
||||
"entities": "^4.4.0",
|
||||
"linkify-it": "^5.0.0",
|
||||
"mdurl": "^2.0.0",
|
||||
"punycode.js": "^2.3.1",
|
||||
"uc.micro": "^2.1.0"
|
||||
},
|
||||
"bin": {
|
||||
"markdown-it": "bin/markdown-it.mjs"
|
||||
}
|
||||
},
|
||||
"node_modules/markdown-it/node_modules/argparse": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
||||
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
||||
},
|
||||
"node_modules/markdown-it/node_modules/entities": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
|
||||
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
|
||||
"engines": {
|
||||
"node": ">=0.12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/entities?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/markdown-table": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz",
|
||||
@ -15045,6 +15118,11 @@
|
||||
"resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.4.tgz",
|
||||
"integrity": "sha512-iV3XNKw06j5Q7mi6h+9vbx23Tv7JkjEVgKHW4pimwyDGWm0OIQntJJ+u1C6mg6mK1EaTv42XQ7w76yuzH7M2cA=="
|
||||
},
|
||||
"node_modules/mdurl": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/mdurl/-/mdurl-2.0.0.tgz",
|
||||
"integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w=="
|
||||
},
|
||||
"node_modules/media-typer": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
|
||||
@ -18317,6 +18395,14 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/punycode.js": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/punycode.js/-/punycode.js-2.3.1.tgz",
|
||||
"integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/q": {
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/q/-/q-1.5.1.tgz",
|
||||
@ -18610,6 +18696,67 @@
|
||||
"react": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/react-markdown-it": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/react-markdown-it/-/react-markdown-it-1.0.2.tgz",
|
||||
"integrity": "sha512-Bzo/9UCCxlL2D7rYiVlxEqiOU66mqmLTzjxN0JLlioEhZhp7amzSq1YNS0+Jf0YKQmpBb5rfI9nh5s3wBsKnww==",
|
||||
"dependencies": {
|
||||
"markdown-it": "^4.4.0",
|
||||
"strip-indent": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-markdown-it/node_modules/entities": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
|
||||
"integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
|
||||
},
|
||||
"node_modules/react-markdown-it/node_modules/linkify-it": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-1.2.4.tgz",
|
||||
"integrity": "sha512-eGHwtlABkp1NOJSiKUNqBf3SYAS5jPHtvRXPAgNaQwTqmkTahjtiLH9NtxdR5IOPhNvwNMN/diswSfZKzUkhGg==",
|
||||
"dependencies": {
|
||||
"uc.micro": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-markdown-it/node_modules/markdown-it": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-4.4.0.tgz",
|
||||
"integrity": "sha512-Rl8dHHeLuAh3E72OPY0tY7CLvlxgHiLhlshIYswAAabAg4YDBLa6e/LTgNkkxBO2K61ESzoquPQFMw/iMrT1PA==",
|
||||
"dependencies": {
|
||||
"argparse": "~1.0.2",
|
||||
"entities": "~1.1.1",
|
||||
"linkify-it": "~1.2.0",
|
||||
"mdurl": "~1.0.0",
|
||||
"uc.micro": "^1.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"markdown-it": "bin/markdown-it.js"
|
||||
}
|
||||
},
|
||||
"node_modules/react-markdown-it/node_modules/mdurl": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz",
|
||||
"integrity": "sha512-/sKlQJCBYVY9Ers9hqzKou4H6V5UWc/M59TH2dvkt+84itfnq7uFOMLpOiOS4ujvHP4etln18fmIxA5R5fll0g=="
|
||||
},
|
||||
"node_modules/react-markdown-it/node_modules/strip-indent": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-1.0.1.tgz",
|
||||
"integrity": "sha512-I5iQq6aFMM62fBEAIB/hXzwJD6EEZ0xEGCX2t7oXqaKPIRgt4WruAQ285BISgdkP+HLGWyeGmNJcpIwFeRYRUA==",
|
||||
"dependencies": {
|
||||
"get-stdin": "^4.0.1"
|
||||
},
|
||||
"bin": {
|
||||
"strip-indent": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-markdown-it/node_modules/uc.micro": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz",
|
||||
"integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA=="
|
||||
},
|
||||
"node_modules/react-plotly.js": {
|
||||
"version": "2.6.0",
|
||||
"resolved": "https://registry.npmjs.org/react-plotly.js/-/react-plotly.js-2.6.0.tgz",
|
||||
@ -21622,6 +21769,11 @@
|
||||
"node": ">=4.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/uc.micro": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz",
|
||||
"integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A=="
|
||||
},
|
||||
"node_modules/ufo": {
|
||||
"version": "1.6.1",
|
||||
"resolved": "https://registry.npmjs.org/ufo/-/ufo-1.6.1.tgz",
|
||||
|
@ -20,12 +20,14 @@
|
||||
"@types/react-dom": "^19.0.4",
|
||||
"@uiw/react-json-view": "^2.0.0-alpha.31",
|
||||
"jsonrepair": "^3.12.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
"mermaid": "^11.6.0",
|
||||
"mui-markdown": "^2.0.1",
|
||||
"prism-react-renderer": "^2.4.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-markdown-it": "^1.0.2",
|
||||
"react-plotly.js": "^2.6.0",
|
||||
"react-router-dom": "^7.6.0",
|
||||
"react-scripts": "5.0.1",
|
||||
@ -61,6 +63,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@craco/craco": "^7.1.0",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/plotly.js": "^2.35.5"
|
||||
}
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ const AboutPage = (props: BackstoryPageProps) => {
|
||||
} else if (subRoute) {
|
||||
setRoute && setRoute(subRoute);
|
||||
}
|
||||
}, [page, route]);
|
||||
}, [page, route, setRoute, subRoute]);
|
||||
|
||||
useEffect(() => {
|
||||
let newRoute = page;
|
||||
@ -44,7 +44,7 @@ const AboutPage = (props: BackstoryPageProps) => {
|
||||
if (route !== newRoute && setRoute) {
|
||||
setRoute(newRoute);
|
||||
}
|
||||
}, [route, page, subRoute]);
|
||||
}, [route, page, subRoute, setRoute]);
|
||||
|
||||
const onDocumentExpand = (document: string, open: boolean) => {
|
||||
console.log("Document expanded:", document, open);
|
||||
@ -53,7 +53,7 @@ const AboutPage = (props: BackstoryPageProps) => {
|
||||
setPage(document);
|
||||
} else {
|
||||
setSubRoute("");
|
||||
setPage(document);
|
||||
setPage("");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,10 @@ const HomePage = forwardRef<ConversationHandle, BackstoryPageProps>((props: Back
|
||||
const theme = useTheme();
|
||||
const isMobile = useMediaQuery(theme.breakpoints.down('md'));
|
||||
|
||||
if (sessionId === undefined) {
|
||||
return <></>;
|
||||
}
|
||||
|
||||
const backstoryPreamble: MessageList = [
|
||||
{
|
||||
role: 'content',
|
||||
|
21
frontend/src/LoadingPage.tsx
Normal file
21
frontend/src/LoadingPage.tsx
Normal file
@ -0,0 +1,21 @@
|
||||
import Box from '@mui/material/Box';
|
||||
import { BackstoryPageProps } from './BackstoryTab';
|
||||
import { BackstoryMessage, Message } from './Message';
|
||||
|
||||
const LoadingPage = (props: BackstoryPageProps) => {
|
||||
const backstoryPreamble: BackstoryMessage = {
|
||||
role: 'info',
|
||||
title: 'Please wait while connecting to Backstory...',
|
||||
disableCopy: true,
|
||||
content: '...',
|
||||
expandable: false,
|
||||
}
|
||||
|
||||
return <Box sx={{display: "flex", flexGrow: 1, maxWidth: "1024px", margin: "0 auto"}}>
|
||||
<Message message={backstoryPreamble} {...props} />
|
||||
</Box>
|
||||
};
|
||||
|
||||
export {
|
||||
LoadingPage
|
||||
};
|
@ -22,6 +22,7 @@ import { Scrollable } from './Scrollable';
|
||||
import { BackstoryPage, BackstoryTabProps } from './BackstoryTab';
|
||||
|
||||
import { HomePage } from './HomePage';
|
||||
import { LoadingPage } from './LoadingPage';
|
||||
import { ResumeBuilderPage } from './ResumeBuilderPage';
|
||||
import { VectorVisualizerPage } from './VectorVisualizer';
|
||||
import { AboutPage } from './AboutPage';
|
||||
@ -92,6 +93,11 @@ const Main = (props: MainProps) => {
|
||||
children: <HomePage ref={chatRef} {...backstoryProps} />
|
||||
};
|
||||
|
||||
const loadingTab: BackstoryTabProps = {
|
||||
...homeTab,
|
||||
children: <LoadingPage {...backstoryProps} />
|
||||
};
|
||||
|
||||
const resumeBuilderTab: BackstoryTabProps = {
|
||||
label: "Resume Builder",
|
||||
path: "resume-builder",
|
||||
@ -132,14 +138,18 @@ const Main = (props: MainProps) => {
|
||||
)
|
||||
};
|
||||
|
||||
return [
|
||||
homeTab,
|
||||
resumeBuilderTab,
|
||||
contextVisualizerTab,
|
||||
aboutTab,
|
||||
controlsTab,
|
||||
];
|
||||
}, [backstoryProps]);
|
||||
if (sessionId === undefined) {
|
||||
return [loadingTab];
|
||||
} else {
|
||||
return [
|
||||
homeTab,
|
||||
resumeBuilderTab,
|
||||
contextVisualizerTab,
|
||||
aboutTab,
|
||||
controlsTab,
|
||||
];
|
||||
}
|
||||
}, [backstoryProps, sessionId]);
|
||||
|
||||
const handleMenuClose = () => {
|
||||
setIsMenuClosing(true);
|
||||
@ -191,7 +201,7 @@ const Main = (props: MainProps) => {
|
||||
}, [tabs]);
|
||||
|
||||
useEffect(() => {
|
||||
if (tab === undefined) {
|
||||
if (tab === undefined || sessionId === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -205,21 +215,6 @@ const Main = (props: MainProps) => {
|
||||
}
|
||||
}, [tab, subRoute, sessionId, navigate, location.pathname]);
|
||||
|
||||
// useEffect(() => {
|
||||
// const pathParts = window.location.pathname.split('/').filter(Boolean);
|
||||
// console.log(window.location.pathname);
|
||||
// const currentPath = pathParts.length < 2 ? '' : pathParts[0];
|
||||
// const currentSubRoute = pathParts.length > 2 ? pathParts.slice(1, -1).join('/') : '';
|
||||
// console.log(currentPath, currentSubRoute, tabs[activeTab].path)
|
||||
// let tabIndex = tabs.findIndex((tab) => tab.path === currentPath);
|
||||
// if (tabIndex === -1) {
|
||||
// console.log(`Invalid path "${currentPath}" -- redirecting to default`);
|
||||
// tabIndex = 0;
|
||||
// }
|
||||
// setActiveTab(tabIndex);
|
||||
// setSubRoute(currentSubRoute);
|
||||
// }, [tabs, subRoute]);
|
||||
|
||||
/* toolbar height is 64px + 8px margin-top */
|
||||
const Offset = styled('div')(() => ({ minHeight: '72px', height: '72px' }));
|
||||
|
||||
|
@ -117,6 +117,8 @@ const MessageMeta = (props: MessageMetaProps) => {
|
||||
} = props.metadata || {};
|
||||
const message: any = props.messageProps.message;
|
||||
|
||||
rag.forEach((r: any) => r.query = message.prompt);
|
||||
|
||||
let llm_submission: string = "<|system|>\n"
|
||||
llm_submission += message.system_prompt + "\n\n"
|
||||
llm_submission += message.context_prompt
|
||||
@ -189,31 +191,18 @@ const MessageMeta = (props: MessageMetaProps) => {
|
||||
</Accordion>
|
||||
}
|
||||
{
|
||||
rag.map((rag: any) => (
|
||||
<Accordion key={rag.name}>
|
||||
rag.map((collection: any) => (
|
||||
<Accordion key={collection.name}>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Box sx={{ fontSize: "0.8rem" }}>
|
||||
Top RAG {rag.ids.length} matches from '{rag.name}' collection against embedding vector of {rag.query_embedding.length} dimensions
|
||||
Top {collection.ids.length} RAG matches from {collection.size} entries using an embedding vector of {collection.query_embedding.length} dimensions
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
<Box sx={{ fontSize: "0.8rem" }}>
|
||||
UMAP Vector Visualization of '{rag.name}' RAG
|
||||
</Box>
|
||||
<VectorVisualizer inline {...props.messageProps} {...props.metadata} rag={rag} />
|
||||
{rag.ids.map((id: number, index: number) => <Box key={index}>
|
||||
<Divider />
|
||||
<Box sx={{ whiteSpace: "nowrap", fontSize: "0.75rem", p: 0, m: 0, pt: 0.5 }}>Doc ID: {rag.ids[index]}</Box>
|
||||
<Box sx={{ fontSize: "0.75rem", display: "flex", flexDirection: "row", mb: 0.5, mt: 0.5 }}>
|
||||
<div style={{ display: "flex", flexDirection: "column", paddingRight: "1rem", minWidth: "10rem" }}>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Distance: {Math.round(rag.distances[index] * 100) / 100}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Type: {rag.metadatas[index].doc_type}</div>
|
||||
<div style={{ whiteSpace: "nowrap" }}>Chunk Len: {rag.documents[index].length}</div>
|
||||
</div>
|
||||
<div style={{ display: "flex", padding: "3px", flexGrow: 1, border: "1px solid #E0E0E0", maxHeight: "5rem", overflow: "auto" }}>{rag.documents[index]}</div>
|
||||
</Box>
|
||||
</Box>
|
||||
)}
|
||||
<VectorVisualizer inline
|
||||
{...props.messageProps} {...props.metadata}
|
||||
rag={collection} />
|
||||
{/* { ...rag, query: message.prompt }} /> */}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
))
|
||||
|
@ -31,6 +31,7 @@ const SessionWrapper = ({ setSnack, children }: SessionWrapperProps) => {
|
||||
const location = useLocation();
|
||||
const [sessionId, setSessionId] = useState<string | undefined>(undefined);
|
||||
const fetchingRef = useRef(false);
|
||||
const [retry, setRetry] = useState<number>(0);
|
||||
|
||||
useEffect(() => {
|
||||
const ensureSessionId = async () => {
|
||||
@ -54,10 +55,14 @@ const SessionWrapper = ({ setSnack, children }: SessionWrapperProps) => {
|
||||
fetchingRef.current = true;
|
||||
ensureSessionId().catch((e) => {
|
||||
console.error(e);
|
||||
setSnack("Backstory is temporarily unavailable.", "error");
|
||||
setSnack("Backstory is temporarily unavailable. Retrying in 5 seconds.", "warning");
|
||||
setTimeout(() => {
|
||||
fetchingRef.current = false;
|
||||
setRetry(retry => retry + 1)
|
||||
}, 5000);
|
||||
});
|
||||
}
|
||||
}, [location.pathname, navigate, setSnack, sessionId]);
|
||||
}, [location.pathname, navigate, setSnack, sessionId, retry]);
|
||||
|
||||
return <>{children}</>;
|
||||
};
|
||||
|
@ -1,3 +1,6 @@
|
||||
.Hover {
|
||||
border: 3px solid purple !important;
|
||||
}
|
||||
/* .js-plotly-plot {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
|
@ -9,11 +9,15 @@ import SendIcon from '@mui/icons-material/Send';
|
||||
import FormControlLabel from '@mui/material/FormControlLabel';
|
||||
import Switch from '@mui/material/Switch';
|
||||
import useMediaQuery from '@mui/material/useMediaQuery';
|
||||
import { useTheme } from '@mui/material/styles';
|
||||
import { SxProps, useTheme } from '@mui/material/styles';
|
||||
import JsonView from '@uiw/react-json-view';
|
||||
import Table from '@mui/material/Table';
|
||||
import TableBody from '@mui/material/TableBody';
|
||||
import TableCell from '@mui/material/TableCell';
|
||||
import TableContainer from '@mui/material/TableContainer';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
|
||||
import { Scrollable } from './Scrollable';
|
||||
import { StyledMarkdown } from './StyledMarkdown';
|
||||
import { connectionBase } from './Global';
|
||||
|
||||
import './VectorVisualizer.css';
|
||||
@ -25,46 +29,67 @@ interface VectorVisualizerProps extends BackstoryPageProps {
|
||||
};
|
||||
|
||||
interface Metadata {
|
||||
doc_type?: string;
|
||||
[key: string]: any;
|
||||
id: string;
|
||||
doc_type: string;
|
||||
content: string;
|
||||
distance?: number;
|
||||
}
|
||||
|
||||
interface ResultData {
|
||||
embeddings: (number[])[];
|
||||
documents: string[];
|
||||
metadatas: Metadata[];
|
||||
ids: string[];
|
||||
dimensions: number;
|
||||
}
|
||||
|
||||
interface PlotData {
|
||||
data: {
|
||||
x: number[];
|
||||
y: number[];
|
||||
z?: number[];
|
||||
colors: string[];
|
||||
text: string[];
|
||||
sizes: number[];
|
||||
symbols: string[];
|
||||
doc_types: string[];
|
||||
};
|
||||
layout: Partial<Plotly.Layout>;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
interface ChromaResult {
|
||||
distances: number[];
|
||||
documents: string[];
|
||||
ids: string[];
|
||||
metadatas: Metadata[];
|
||||
query_embedding: number[];
|
||||
type QuerySet = {
|
||||
ids: string[],
|
||||
documents: string[],
|
||||
metadatas: Metadata[],
|
||||
embeddings: (number[])[],
|
||||
distances?: (number | undefined)[],
|
||||
dimensions?: number;
|
||||
query?: string;
|
||||
umap_embedding_2d?: number[];
|
||||
umap_embedding_3d?: number[];
|
||||
};
|
||||
|
||||
const emptyQuerySet = {
|
||||
ids: [],
|
||||
documents: [],
|
||||
metadatas: [],
|
||||
embeddings: [],
|
||||
};
|
||||
|
||||
interface PlotData {
|
||||
x: number[];
|
||||
y: number[];
|
||||
z?: number[];
|
||||
colors: string[];
|
||||
text: string[];
|
||||
sizes: number[];
|
||||
customdata: Metadata[];
|
||||
}
|
||||
|
||||
const layout: Partial<Plotly.Layout> = {
|
||||
autosize: true,
|
||||
paper_bgcolor: '#FFFFFF', // white
|
||||
plot_bgcolor: '#FFFFFF', // white plot background
|
||||
font: {
|
||||
family: 'Roboto, sans-serif',
|
||||
color: '#2E2E2E', // charcoal black
|
||||
},
|
||||
hovermode: 'closest',
|
||||
scene: {
|
||||
bgcolor: '#FFFFFF', // 3D plot background
|
||||
zaxis: { title: 'Z', gridcolor: '#cccccc', zerolinecolor: '#aaaaaa' },
|
||||
},
|
||||
xaxis: { title: 'X', gridcolor: '#cccccc', zerolinecolor: '#aaaaaa' },
|
||||
yaxis: { title: 'Y', gridcolor: '#cccccc', zerolinecolor: '#aaaaaa' },
|
||||
margin: { r: 0, b: 0, l: 0, t: 0 },
|
||||
legend: {
|
||||
x: 0.8, // Horizontal position (0 to 1, 0 is left, 1 is right)
|
||||
y: 0, // Vertical position (0 to 1, 0 is bottom, 1 is top)
|
||||
xanchor: 'left',
|
||||
yanchor: 'top',
|
||||
orientation: 'h' // 'v' for horizontal legend
|
||||
},
|
||||
showlegend: true // Show the legend
|
||||
};
|
||||
|
||||
const normalizeDimension = (arr: number[]): number[] => {
|
||||
const min = Math.min(...arr);
|
||||
const max = Math.max(...arr);
|
||||
@ -73,14 +98,6 @@ const normalizeDimension = (arr: number[]): number[] => {
|
||||
return arr.map(v => (v - min) / range);
|
||||
};
|
||||
|
||||
const getTextColorForBackground = (bgColor: string): string => {
|
||||
const r = parseInt(bgColor.slice(1, 3), 16);
|
||||
const g = parseInt(bgColor.slice(3, 5), 16);
|
||||
const b = parseInt(bgColor.slice(5, 7), 16);
|
||||
const luminance = 0.299 * r + 0.587 * g + 0.114 * b;
|
||||
return luminance > 186 ? '#2E2E2E' : '#FFFFFF'; // Charcoal or white from your theme
|
||||
};
|
||||
|
||||
const emojiMap: Record<string, string> = {
|
||||
query: '🔍',
|
||||
resume: '📄',
|
||||
@ -99,30 +116,34 @@ const colorMap: Record<string, string> = {
|
||||
'jobs': '#F3aD8F', // Warm Gray — soft and neutral
|
||||
};
|
||||
|
||||
const sizeMap: Record<string, number> = {
|
||||
'query': 10,
|
||||
};
|
||||
const DEFAULT_SIZE = 6.;
|
||||
const DEFAULT_UNFOCUS_SIZE = 2.;
|
||||
|
||||
const symbolMap: Record<string, string> = {
|
||||
'query': 'circle',
|
||||
type Node = {
|
||||
id: string,
|
||||
content: string, // Portion of content that was used for embedding
|
||||
full_content: string | undefined, // Portion of content plus/minus buffer
|
||||
emoji: string,
|
||||
doc_type: string,
|
||||
source_file: string,
|
||||
distance: number | undefined,
|
||||
path: string,
|
||||
chunk_begin: number,
|
||||
line_begin: number,
|
||||
chunk_end: number,
|
||||
line_end: number,
|
||||
sx: SxProps,
|
||||
};
|
||||
|
||||
const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualizerProps) => {
|
||||
const { sessionId, setSnack, rag, inline, sx, submitQuery } = props;
|
||||
const { sessionId, setSnack, rag, inline, sx } = props;
|
||||
const [plotData, setPlotData] = useState<PlotData | null>(null);
|
||||
const [newQuery, setNewQuery] = useState<string>('');
|
||||
const [newQueryEmbedding, setNewQueryEmbedding] = useState<ChromaResult | undefined>(undefined);
|
||||
const [result, setResult] = useState<ResultData | undefined>(undefined);
|
||||
const [querySet, setQuerySet] = useState<QuerySet>(rag || emptyQuerySet);
|
||||
const [result, setResult] = useState<QuerySet | undefined>(undefined);
|
||||
const [view2D, setView2D] = useState<boolean>(true);
|
||||
const plotlyRef = useRef(null);
|
||||
const [tooltip, setTooltip] = useState<{
|
||||
visible: boolean,
|
||||
// x: number,
|
||||
// y: number,
|
||||
content: string,
|
||||
background: string,
|
||||
color: string,
|
||||
} | null>(null);
|
||||
const [node, setNode] = useState<Node | null>(null);
|
||||
const theme = useTheme();
|
||||
const isMobile = useMediaQuery(theme.breakpoints.down('md'));
|
||||
|
||||
@ -140,7 +161,7 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
},
|
||||
body: JSON.stringify({ dimensions: view2D ? 2 : 3 }),
|
||||
});
|
||||
const data: ResultData = await response.json();
|
||||
const data: QuerySet = await response.json();
|
||||
data.dimensions = view2D ? 2 : 3;
|
||||
setResult(data);
|
||||
} catch (error) {
|
||||
@ -156,122 +177,137 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
if (!result || !result.embeddings) return;
|
||||
if (result.embeddings.length === 0) return;
|
||||
|
||||
const vectors: (number[])[] = [...result.embeddings];
|
||||
const documents = [...result.documents || []];
|
||||
const metadatas = [...result.metadatas || []];
|
||||
const ids = [...result.ids || []];
|
||||
|
||||
let is2D = vectors.every((v: number[]) => v.length === 2);
|
||||
let is3D = vectors.every((v: number[]) => v.length === 3);
|
||||
|
||||
const full: QuerySet = {
|
||||
ids: [...result.ids || []],
|
||||
documents: [...result.documents || []],
|
||||
embeddings: [...result.embeddings],
|
||||
metadatas: [...result.metadatas || []],
|
||||
};
|
||||
let is2D = full.embeddings.every((v: number[]) => v.length === 2);
|
||||
let is3D = full.embeddings.every((v: number[]) => v.length === 3);
|
||||
if ((view2D && !is2D) || (!view2D && !is3D)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (view2D && rag && rag.umap_embedding_2d) {
|
||||
metadatas.unshift({ doc_type: 'query' });
|
||||
documents.unshift('Query');
|
||||
vectors.unshift(rag.umap_embedding_2d);
|
||||
}
|
||||
|
||||
if (!view2D && rag && rag.umap_embedding_3d) {
|
||||
metadatas.unshift({ doc_type: 'query' });
|
||||
documents.unshift('Query');
|
||||
vectors.unshift(rag.umap_embedding_3d);
|
||||
}
|
||||
|
||||
if (newQueryEmbedding !== undefined) {
|
||||
metadatas.unshift({ doc_type: 'query' });
|
||||
documents.unshift(newQueryEmbedding.query || '');
|
||||
if (view2D && newQueryEmbedding.umap_embedding_2d) {
|
||||
vectors.unshift(newQueryEmbedding.umap_embedding_2d);
|
||||
}
|
||||
if (!view2D && newQueryEmbedding.umap_embedding_3d) {
|
||||
vectors.unshift(newQueryEmbedding.umap_embedding_3d);
|
||||
}
|
||||
}
|
||||
|
||||
is2D = vectors.every((v: number[]) => v.length === 2);
|
||||
is3D = vectors.every((v: number[]) => v.length === 3);
|
||||
|
||||
if (!is2D && !is3D) {
|
||||
console.warn('Modified vectors are neither 2D nor 3D');
|
||||
return;
|
||||
}
|
||||
|
||||
const doc_types = metadatas.map(m => m.doc_type || 'unknown')
|
||||
|
||||
const sizes = doc_types.map((type, index) => {
|
||||
if (!sizeMap[type]) {
|
||||
sizeMap[type] = 5;
|
||||
}
|
||||
/* If this is a match, increase the size */
|
||||
if (rag && rag.ids.includes(ids[index])) {
|
||||
return sizeMap[type] + 5;
|
||||
}
|
||||
if (newQueryEmbedding && newQueryEmbedding.ids && newQueryEmbedding.ids.includes(ids[index])) {
|
||||
return sizeMap[type] + 5;
|
||||
}
|
||||
return sizeMap[type];
|
||||
});
|
||||
const symbols = doc_types.map(type => {
|
||||
if (!symbolMap[type]) {
|
||||
symbolMap[type] = 'circle';
|
||||
}
|
||||
return symbolMap[type];
|
||||
});
|
||||
const colors = doc_types.map(type => {
|
||||
if (!colorMap[type]) {
|
||||
colorMap[type] = '#ff0000';
|
||||
}
|
||||
return colorMap[type];
|
||||
});
|
||||
const customdata = metadatas.map((m, index) => {
|
||||
return { doc: documents[index], type: m.doc_type || 'unknown' };
|
||||
});
|
||||
const x = normalizeDimension(vectors.map((v: number[]) => v[0]));
|
||||
const y = normalizeDimension(vectors.map((v: number[]) => v[1]));
|
||||
const z = is3D ? normalizeDimension(vectors.map((v: number[]) => v[2])) : undefined
|
||||
|
||||
const layout: Partial<Plotly.Layout> = {
|
||||
autosize: true,
|
||||
paper_bgcolor: '#FFFFFF', // white
|
||||
plot_bgcolor: '#FFFFFF', // white plot background
|
||||
font: {
|
||||
family: 'Roboto, sans-serif',
|
||||
color: '#2E2E2E', // charcoal black
|
||||
},
|
||||
hovermode: 'closest',
|
||||
scene: {
|
||||
bgcolor: '#FFFFFF', // 3D plot background
|
||||
zaxis: { title: 'Z', gridcolor: '#cccccc', zerolinecolor: '#aaaaaa' },
|
||||
},
|
||||
xaxis: { title: 'X', gridcolor: '#cccccc', zerolinecolor: '#aaaaaa' },
|
||||
yaxis: { title: 'Y', gridcolor: '#cccccc', zerolinecolor: '#aaaaaa' },
|
||||
margin: { r: 0, b: 0, l: 0, t: 0 },
|
||||
let query: QuerySet = {
|
||||
ids: [],
|
||||
documents: [],
|
||||
embeddings: [],
|
||||
metadatas: [],
|
||||
distances: [],
|
||||
};
|
||||
let filtered: QuerySet = {
|
||||
ids: [],
|
||||
documents: [],
|
||||
embeddings: [],
|
||||
metadatas: [],
|
||||
};
|
||||
|
||||
const data: any = {
|
||||
x: x,
|
||||
y: y,
|
||||
mode: 'markers',
|
||||
marker: {
|
||||
size: sizes,
|
||||
symbol: symbols,
|
||||
color: colors,
|
||||
opacity: 0.8,
|
||||
},
|
||||
customdata: customdata,
|
||||
type: z?.length ? 'scatter3d' : 'scatter',
|
||||
};
|
||||
/* Loop through all items and divide into two groups:
|
||||
* filtered is for any item not in the querySet
|
||||
* query is for any item that is in the querySet
|
||||
*/
|
||||
console.log(querySet);
|
||||
|
||||
if (is3D) {
|
||||
data.z = z;
|
||||
full.ids.forEach((id, index) => {
|
||||
const foundIndex = querySet.ids.indexOf(id);
|
||||
/* Update metadata to hold the doc content and id */
|
||||
full.metadatas[index].id = id;
|
||||
full.metadatas[index].content = full.documents[index];
|
||||
if (foundIndex !== -1) {
|
||||
/* The query set will contain the distance to the query */
|
||||
full.metadatas[index].distance = querySet.distances ? querySet.distances[foundIndex] : undefined;
|
||||
console.log(querySet.distances ? querySet.distances[foundIndex] : undefined);
|
||||
query.ids.push(id);
|
||||
query.documents.push(full.documents[index]);
|
||||
query.embeddings.push(full.embeddings[index]);
|
||||
query.metadatas.push(full.metadatas[index]);
|
||||
} else {
|
||||
/* THe filtered set does not have a distance */
|
||||
full.metadatas[index].distance = undefined;
|
||||
filtered.ids.push(id);
|
||||
filtered.documents.push(full.documents[index]);
|
||||
filtered.embeddings.push(full.embeddings[index]);
|
||||
filtered.metadatas.push(full.metadatas[index]);
|
||||
}
|
||||
});
|
||||
|
||||
if (view2D && querySet.umap_embedding_2d && querySet.umap_embedding_2d.length) {
|
||||
query.ids.unshift('query');
|
||||
query.metadatas.unshift({ id: 'query', doc_type: 'query', content: querySet.query || '', distance: 0 });
|
||||
query.embeddings.unshift(querySet.umap_embedding_2d);
|
||||
}
|
||||
|
||||
setPlotData({ data, layout });
|
||||
if (!view2D && querySet.umap_embedding_3d && querySet.umap_embedding_3d.length) {
|
||||
query.ids.unshift('query');
|
||||
query.metadatas.unshift({ id: 'query', doc_type: 'query', content: querySet.query || '', distance: 0 });
|
||||
query.embeddings.unshift(querySet.umap_embedding_3d);
|
||||
}
|
||||
|
||||
}, [result, newQueryEmbedding, rag, view2D, setPlotData, setSnack]);
|
||||
const filtered_doc_types = filtered.metadatas.map(m => m.doc_type || 'unknown')
|
||||
const query_doc_types = query.metadatas.map(m => m.doc_type || 'unknown')
|
||||
|
||||
const has_query = query.metadatas.length > 0;
|
||||
const filtered_sizes = filtered.metadatas.map(m => has_query ? DEFAULT_UNFOCUS_SIZE : DEFAULT_SIZE);
|
||||
const filtered_colors = filtered_doc_types.map(type => colorMap[type] || '#ff8080');
|
||||
const filtered_x = normalizeDimension(filtered.embeddings.map((v: number[]) => v[0]));
|
||||
const filtered_y = normalizeDimension(filtered.embeddings.map((v: number[]) => v[1]));
|
||||
const filtered_z = is3D ? normalizeDimension(filtered.embeddings.map((v: number[]) => v[2])) : undefined;
|
||||
|
||||
const query_sizes = query.metadatas.map(m => DEFAULT_SIZE + 2. * DEFAULT_SIZE * Math.pow((1. - (m.distance || 1.)), 3));
|
||||
const query_colors = query_doc_types.map(type => colorMap[type] || '#ff8080');
|
||||
const query_x = normalizeDimension(query.embeddings.map((v: number[]) => v[0]));
|
||||
const query_y = normalizeDimension(query.embeddings.map((v: number[]) => v[1]));
|
||||
const query_z = is3D ? normalizeDimension(query.embeddings.map((v: number[]) => v[2])) : undefined;
|
||||
|
||||
query_sizes.forEach((s, i) => { console.log(`distance: ${query.metadatas[i].distance} size: ${s}`) });
|
||||
|
||||
// console.log(query_sizes.length, query_colors.length, query_x.length, query_y.length, query.ids.length, query.metadatas.length, query.embeddings.length);
|
||||
// console.log(filtered_sizes.length, filtered_colors.length, filtered_x.length, filtered_y.length, filtered.ids.length, filtered.metadatas.length, filtered.embeddings.length);
|
||||
|
||||
const data: any = [{
|
||||
name: 'All data',
|
||||
x: filtered_x,
|
||||
y: filtered_y,
|
||||
mode: 'markers',
|
||||
marker: {
|
||||
size: filtered_sizes,
|
||||
symbol: 'circle',
|
||||
color: filtered_colors,
|
||||
},
|
||||
text: filtered.ids,
|
||||
customdata: filtered.metadatas,
|
||||
type: is3D ? 'scatter3d' : 'scatter',
|
||||
hovertemplate: ' ',
|
||||
}, {
|
||||
name: 'Query',
|
||||
x: query_x,
|
||||
y: query_y,
|
||||
mode: 'markers',
|
||||
marker: {
|
||||
size: query_sizes,
|
||||
symbol: 'circle',
|
||||
color: query_colors,
|
||||
},
|
||||
text: query.ids,
|
||||
customdata: query.metadatas,
|
||||
type: is3D ? 'scatter3d' : 'scatter',
|
||||
hovertemplate: '%{text}',
|
||||
}];
|
||||
|
||||
if (is3D) {
|
||||
data[0].z = filtered_z;
|
||||
data[1].z = query_z;
|
||||
}
|
||||
|
||||
setPlotData(data);
|
||||
|
||||
}, [result, querySet, view2D, setPlotData, setSnack]);
|
||||
|
||||
if (setSnack === undefined) {
|
||||
console.error('setSnack function is undefined');
|
||||
@ -298,8 +334,8 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
dimensions: view2D ? 2 : 3,
|
||||
})
|
||||
});
|
||||
const chroma: ChromaResult = await response.json();
|
||||
setNewQueryEmbedding(chroma);
|
||||
const data = await response.json();
|
||||
setQuerySet(data);
|
||||
} catch (error) {
|
||||
console.error('Error obtaining query similarity information:', error);
|
||||
setSnack("Unable to obtain query similarity information.", "error");
|
||||
@ -312,6 +348,64 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
</Box>
|
||||
);
|
||||
|
||||
const fetchRAGMeta = async (node: Node) => {
|
||||
try {
|
||||
const response = await fetch(connectionBase + `/api/umap/entry/${node.id}/${sessionId}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
const update: Node = {
|
||||
...node,
|
||||
full_content: await response.json()
|
||||
}
|
||||
setNode(update);
|
||||
} catch (error) {
|
||||
const msg = `Error obtaining content for ${node.id}.`
|
||||
console.error(msg, error);
|
||||
setSnack(msg, "error");
|
||||
};
|
||||
};
|
||||
|
||||
const onNodeSelected = (metadata: any) => {
|
||||
let node: Node;
|
||||
if (metadata.doc_type === 'query') {
|
||||
node = {
|
||||
...metadata,
|
||||
content: `Similarity results for the query **${querySet.query || ''}**
|
||||
|
||||
The scatter graph shows the query in N-dimensional space, mapped to ${view2D ? '2' : '3'}-dimensional space. Larger dots represent relative similarity in N-dimensional space.
|
||||
`,
|
||||
emoji: emojiMap[metadata.doc_type],
|
||||
sx: {
|
||||
m: 0.5,
|
||||
p: 2,
|
||||
width: '3rem',
|
||||
display: "flex",
|
||||
alignContent: "center",
|
||||
justifyContent: "center",
|
||||
flexGrow: 0,
|
||||
flexWrap: "wrap",
|
||||
backgroundColor: colorMap[metadata.doc_type] || '#ff8080',
|
||||
}
|
||||
}
|
||||
setNode(node);
|
||||
return;
|
||||
}
|
||||
|
||||
node = {
|
||||
content: `Loading...`,
|
||||
...metadata,
|
||||
emoji: emojiMap[metadata.doc_type] || '❓',
|
||||
}
|
||||
|
||||
setNode(node);
|
||||
|
||||
fetchRAGMeta(node);
|
||||
};
|
||||
|
||||
return (
|
||||
<Card className="VectorVisualizer"
|
||||
sx={{
|
||||
@ -319,6 +413,9 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
position: 'relative',
|
||||
flexDirection: 'column',
|
||||
flexGrow: 1,
|
||||
m: 0,
|
||||
p: 0,
|
||||
border: "none",
|
||||
...sx
|
||||
}}>
|
||||
{
|
||||
@ -329,9 +426,8 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
}
|
||||
<Card sx={{ p: 0, m: 0, display: "flex", flexGrow: 1, position: "relative", flexDirection: isMobile ? "column" : "row" }}>
|
||||
<Box sx={{ p: 0, m: 0, display: "flex", flexGrow: 1, position: "relative", flexDirection: "column" }}>
|
||||
<Box sx={{
|
||||
borderBottom: "1px solid #2E2E2E",
|
||||
p: 0, m: 0,
|
||||
<Card sx={{
|
||||
p: 0.5, m: 0.5,
|
||||
display: "flex",
|
||||
flexGrow: 0,
|
||||
height: isMobile ? "auto" : "320px",
|
||||
@ -354,20 +450,8 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
control={<Switch checked={!view2D} />} onChange={() => setView2D(!view2D)} label="3D" />
|
||||
<Plot
|
||||
ref={plotlyRef}
|
||||
onClick={(event: any) => {
|
||||
const point = event.points[0];
|
||||
console.log('Point:', point);
|
||||
const type = point.customdata.type;
|
||||
const text = point.customdata.doc;
|
||||
const emoji = emojiMap[type] || '❓';
|
||||
setTooltip({
|
||||
visible: true,
|
||||
background: point['marker.color'],
|
||||
color: getTextColorForBackground(point['marker.color']),
|
||||
content: `${emoji} ${type.toUpperCase()}\n${text}`,
|
||||
});
|
||||
}}
|
||||
data={[plotData.data]}
|
||||
onClick={(event: any) => { onNodeSelected(event.points[0].customdata); }}
|
||||
data={plotData}
|
||||
useResizeHandler={true}
|
||||
config={{
|
||||
responsive: true,
|
||||
@ -384,12 +468,15 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
margin: 0,
|
||||
width: "100%",
|
||||
height: "100%",
|
||||
overflow: "hidden",
|
||||
}}
|
||||
layout={plotData.layout}
|
||||
layout={layout}
|
||||
/>
|
||||
</Box>
|
||||
</Card>
|
||||
{
|
||||
!inline && newQueryEmbedding && <Scrollable sx={{
|
||||
!inline && querySet.ids.length > 0 && <Scrollable
|
||||
autoscroll={false}
|
||||
sx={{
|
||||
display: "flex",
|
||||
position: "relative",
|
||||
width: "100%",
|
||||
@ -401,48 +488,125 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
displayDataTypes={false}
|
||||
objectSortKeys={true}
|
||||
collapsed={1}
|
||||
value={newQueryEmbedding}
|
||||
value={querySet}
|
||||
style={{
|
||||
padding: "14px 0",
|
||||
fontSize: "0.8rem",
|
||||
overflow: "hidden",
|
||||
width: "100%",
|
||||
minHeight: "max-content",
|
||||
}} />
|
||||
}}>
|
||||
<JsonView.Row
|
||||
as="div"
|
||||
render={(props, { keyName, value, parentValue }) => {
|
||||
return (
|
||||
<div
|
||||
{...props}
|
||||
onClick={() => {
|
||||
if (typeof value === "string") {
|
||||
const id: string = value;
|
||||
/* Not sure why 'distance' isn't being tracked in customdata... */
|
||||
const item = plotData.customdata.find(m => m.id === id)
|
||||
if (item) {
|
||||
onNodeSelected(item);
|
||||
}
|
||||
}
|
||||
|
||||
// console.log("keyName", keyName)
|
||||
// console.log("value", value)
|
||||
// console.log("parentValue", parentValue)
|
||||
}}
|
||||
/>
|
||||
)
|
||||
}}
|
||||
/>
|
||||
</JsonView>
|
||||
</Scrollable>
|
||||
}
|
||||
{
|
||||
!inline && !newQueryEmbedding && <Box sx={{ p: 1 }}>Enter query below to view distances.</Box>
|
||||
!inline && querySet.ids.length === 0 && <Box sx={{ p: 1 }}>Enter query below to view distances.</Box>
|
||||
}
|
||||
</Box>
|
||||
|
||||
{!inline &&
|
||||
<Scrollable sx={{
|
||||
borderLeft: isMobile ? "none" : "1px solid #2E2E2E",
|
||||
display: 'flex',
|
||||
flexDirection: 'column',
|
||||
flexGrow: isMobile ? 1 : 0.5,
|
||||
width: isMobile ? "100%" : "600px",
|
||||
maxWidth: isMobile ? "100%" : "600px",
|
||||
// height: "calc(100vh - 72px - 144px)",
|
||||
mt: 0,
|
||||
p: 0.5,
|
||||
color: tooltip?.color || '#2E2E2E',
|
||||
background: tooltip?.background || '#FFFFFF',
|
||||
whiteSpace: 'pre-line',
|
||||
zIndex: 1000,
|
||||
overflow: 'auto',
|
||||
overflowWrap: 'break-all',
|
||||
wordBreak: 'break-all',
|
||||
}}
|
||||
>
|
||||
<StyledMarkdown sx={{ p: 1, pt: 0 }} content={tooltip?.content || "Select a node in the visualization."} {...{ sessionId, setSnack, submitQuery }} />
|
||||
{<Box sx={{
|
||||
display: "flex", flexDirection: "column",
|
||||
flexGrow: isMobile ? 1 : 0.5,
|
||||
width: isMobile ? "100%" : "600px",
|
||||
maxWidth: isMobile ? "100%" : "600px",
|
||||
}}>
|
||||
{node !== null &&
|
||||
<Card sx={{ display: "flex", flexDirection: "column", m: 0.5, p: 0.5, flexGrow: 1, minHeight: "fit-content" }}>
|
||||
<TableContainer component={Card} sx={{ mb: 1, minHeight: "max-content" }}>
|
||||
<Table size="small" sx={{ tableLayout: 'fixed' }}>
|
||||
<TableBody sx={{ '& td': { verticalAlign: "top" }, '& td:first-of-type': { whiteSpace: "nowrap", width: "5rem" } }}>
|
||||
<TableRow>
|
||||
<TableCell>Type</TableCell>
|
||||
<TableCell>{node.emoji} {node.doc_type}</TableCell>
|
||||
</TableRow>
|
||||
{node.source_file !== undefined && <TableRow>
|
||||
<TableCell>File</TableCell>
|
||||
<TableCell>{node.source_file.replace(/^.*\//, '')}, lines: {node.line_begin}-{node.line_end}</TableCell>
|
||||
</TableRow>}
|
||||
{node.path !== undefined && <TableRow>
|
||||
<TableCell>Location</TableCell>
|
||||
<TableCell>{node.path}</TableCell>
|
||||
</TableRow>}
|
||||
{node.distance !== undefined && <TableRow>
|
||||
<TableCell>Distance</TableCell>
|
||||
<TableCell>{node.distance}</TableCell>
|
||||
</TableRow>}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
{node.content !== "" && node.content !== undefined &&
|
||||
<Box sx={{ display: "flex", fontSize: "0.75rem", flexDirection: "column", border: "1px solid #808080", minHeight: "fit-content", flexGrow: 1 }}>
|
||||
<Box sx={{ display: "flex", background: "#404040", p: 1, color: "white" }}>Vector Embedded Content</Box>
|
||||
<Box sx={{ display: "flex", p: 1, flexGrow: 1 }}>{node.content}</Box>
|
||||
</Box>
|
||||
}
|
||||
</Card>
|
||||
}
|
||||
{node === null &&
|
||||
<Card sx={{ m: 0.5, p: 1, flexGrow: 1 }}>No node selected. Click a point in the scatter-graph.</Card>
|
||||
}
|
||||
{!inline &&
|
||||
<Scrollable
|
||||
autoscroll={false}
|
||||
sx={{
|
||||
display: 'flex',
|
||||
flexDirection: 'column',
|
||||
// height: "calc(100vh - 72px - 144px)",
|
||||
m: 0,
|
||||
p: 0.5,
|
||||
whiteSpace: 'pre-line',
|
||||
zIndex: 1000,
|
||||
overflow: 'auto',
|
||||
overflowWrap: 'break-all',
|
||||
wordBreak: 'break-all',
|
||||
flexGrow: 1
|
||||
}}
|
||||
>
|
||||
{node !== null && node.full_content &&
|
||||
node.full_content.split('\n').map((line, index) => {
|
||||
index += 1 + node.chunk_begin;
|
||||
const bgColor = (index > node.line_begin && index <= node.line_end) ? '#f0f0f0' : 'auto';
|
||||
// console.log(index, node.line_begin, node.line_end, bgColor);
|
||||
return <Box key={index} sx={{ display: "flex", flexDirection: "row", borderBottom: '1px solid #d0d0d0', backgroundColor: bgColor }}>
|
||||
<Box sx={{ fontFamily: 'courier', fontSize: "0.8rem", minWidth: "2rem", pt: "0.2rem", align: "left", verticalAlign: "top" }}>{index}</Box>
|
||||
<pre style={{ margin: 0, padding: 0, border: "none", minHeight: "1rem" }} >{line || " "}</pre>
|
||||
</Box>;
|
||||
})
|
||||
}
|
||||
{node !== null && !node.line_begin && <pre style={{ margin: 0, padding: 0, border: "none" }}>{node.content}</pre>}
|
||||
{node === null && "Select a node in the visualization."}
|
||||
</Scrollable>
|
||||
}
|
||||
</Box >
|
||||
}
|
||||
</Card>
|
||||
{!inline && newQueryEmbedding !== undefined &&
|
||||
{!inline && querySet.query !== undefined && querySet.query !== '' &&
|
||||
<Card sx={{ display: 'flex', flexDirection: 'column', justifyContent: 'center', flexGrow: 0, minHeight: '2.5rem', maxHeight: '2.5rem', height: '2.5rem', alignItems: 'center', mt: 1, pb: 0 }}>
|
||||
Query: {newQueryEmbedding.query}
|
||||
Query: {querySet.query}
|
||||
</Card>
|
||||
}
|
||||
|
||||
@ -471,6 +635,7 @@ const VectorVisualizer: React.FC<VectorVisualizerProps> = (props: VectorVisualiz
|
||||
|
||||
const VectorVisualizerPage: React.FC<VectorVisualizerProps> = (props: VectorVisualizerProps) => {
|
||||
return <Scrollable
|
||||
autoscroll={false}
|
||||
sx={{
|
||||
maxWidth: "1024px",
|
||||
height: "calc(100vh - 72px)",
|
||||
@ -480,7 +645,7 @@ const VectorVisualizerPage: React.FC<VectorVisualizerProps> = (props: VectorVisu
|
||||
</Scrollable>;
|
||||
};
|
||||
|
||||
export type { VectorVisualizerProps, ResultData };
|
||||
export type { VectorVisualizerProps };
|
||||
|
||||
export {
|
||||
VectorVisualizer,
|
||||
|
@ -172,19 +172,31 @@ const useAutoScrollToBottom = (
|
||||
const scrollTo = scrollToRef.current;
|
||||
if (!container) return;
|
||||
|
||||
const handleScroll = () => {
|
||||
const handleScroll = (ev: Event, pause?: number) => {
|
||||
const currentScrollTop = container.scrollTop;
|
||||
isUserScrollingUpRef.current = currentScrollTop < lastScrollTop.current;
|
||||
debug && console.debug(`Scrolling up: ${isUserScrollingUpRef.current}`);
|
||||
/* If the user is scrolling up *or* they used the scroll wheel and didn't scroll,
|
||||
* they may be zooming in a region; pause scrolling */
|
||||
isUserScrollingUpRef.current = (currentScrollTop <= lastScrollTop.current) || pause ? true : false;
|
||||
debug && console.debug(`Scrolling up or paused: ${isUserScrollingUpRef.current} ${pause}`);
|
||||
lastScrollTop.current = currentScrollTop;
|
||||
|
||||
if (scrollTimeout.current) clearTimeout(scrollTimeout.current);
|
||||
scrollTimeout.current = setTimeout(() => {
|
||||
isUserScrollingUpRef.current = false;
|
||||
debug && console.debug(`Scrolling up: ${isUserScrollingUpRef.current}`);
|
||||
}, 500);
|
||||
}, pause ? pause : 500);
|
||||
};
|
||||
|
||||
const pauseScroll = (ev: Event) => {
|
||||
debug && console.log("Pausing for mouse movement");
|
||||
handleScroll(ev, 500);
|
||||
}
|
||||
|
||||
const pauseClick = (ev: Event) => {
|
||||
debug && console.log("Pausing for mouse click");
|
||||
handleScroll(ev, 1000);
|
||||
}
|
||||
|
||||
const handlePaste = () => {
|
||||
// Delay scroll check to ensure DOM updates
|
||||
setTimeout(() => {
|
||||
@ -192,6 +204,9 @@ const useAutoScrollToBottom = (
|
||||
}, 0);
|
||||
};
|
||||
|
||||
window.addEventListener('mousemove', pauseScroll);
|
||||
window.addEventListener('mousedown', pauseClick);
|
||||
|
||||
container.addEventListener('scroll', handleScroll);
|
||||
if (scrollTo) {
|
||||
scrollTo.addEventListener('paste', handlePaste);
|
||||
@ -199,6 +214,8 @@ const useAutoScrollToBottom = (
|
||||
checkAndScrollToBottom();
|
||||
|
||||
return () => {
|
||||
window.removeEventListener('mousedown', pauseClick);
|
||||
window.removeEventListener('mousemove', pauseScroll);
|
||||
container.removeEventListener('scroll', handleScroll);
|
||||
if (scrollTo) {
|
||||
scrollTo.removeEventListener('paste', handlePaste);
|
||||
|
@ -308,6 +308,38 @@ class WebServer:
|
||||
return RedirectResponse(url=f"/{context.id}", status_code=307)
|
||||
# return JSONResponse({"redirect": f"/{context.id}"})
|
||||
|
||||
@self.app.get("/api/umap/entry/{doc_id}/{context_id}")
|
||||
async def get_umap(doc_id: str, context_id: str, request: Request):
|
||||
logger.info(f"{request.method} {request.url.path}")
|
||||
try:
|
||||
if not self.file_watcher:
|
||||
raise Exception("File watcher not initialized")
|
||||
|
||||
context = self.upsert_context(context_id)
|
||||
if not context:
|
||||
return JSONResponse(
|
||||
{"error": f"Invalid context: {context_id}"}, status_code=400
|
||||
)
|
||||
collection = self.file_watcher.umap_collection
|
||||
if not collection:
|
||||
return JSONResponse(
|
||||
{"error": "No UMAP collection found"}, status_code=404
|
||||
)
|
||||
if not collection.get("metadatas", None):
|
||||
return JSONResponse(f"Document id {doc_id} not found.", 404)
|
||||
|
||||
for index, id in enumerate(collection.get("ids", [])):
|
||||
if id == doc_id:
|
||||
metadata = collection.get("metadatas", [])[index].copy()
|
||||
content = self.file_watcher.prepare_metadata(metadata)
|
||||
return JSONResponse(content)
|
||||
|
||||
return JSONResponse(f"Document id {doc_id} not found.", 404)
|
||||
except Exception as e:
|
||||
logger.error(f"get_umap error: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
return JSONResponse({"error": str(e)}, 500)
|
||||
|
||||
@self.app.put("/api/umap/{context_id}")
|
||||
async def put_umap(context_id: str, request: Request):
|
||||
logger.info(f"{request.method} {request.url.path}")
|
||||
@ -324,8 +356,8 @@ class WebServer:
|
||||
data = await request.json()
|
||||
|
||||
dimensions = data.get("dimensions", 2)
|
||||
result = self.file_watcher.umap_collection
|
||||
if not result:
|
||||
collection = self.file_watcher.umap_collection
|
||||
if not collection:
|
||||
return JSONResponse(
|
||||
{"error": "No UMAP collection found"}, status_code=404
|
||||
)
|
||||
@ -340,8 +372,13 @@ class WebServer:
|
||||
return JSONResponse(
|
||||
{"error": "No UMAP embedding found"}, status_code=404
|
||||
)
|
||||
|
||||
result["embeddings"] = umap_embedding.tolist()
|
||||
result = {
|
||||
"ids": collection.get("ids", []),
|
||||
"metadatas": collection.get("metadatas", []),
|
||||
"documents": collection.get("documents", []),
|
||||
"embeddings": umap_embedding.tolist(),
|
||||
"size": self.file_watcher.collection.count()
|
||||
}
|
||||
|
||||
return JSONResponse(result)
|
||||
|
||||
@ -363,12 +400,12 @@ class WebServer:
|
||||
try:
|
||||
data = await request.json()
|
||||
query = data.get("query", "")
|
||||
threshold = data.get("threshold", 0.5)
|
||||
results = data.get("results", 10)
|
||||
threshold = data.get("threshold", defines.default_rag_threshold)
|
||||
results = data.get("results", defines.default_rag_top_k)
|
||||
except:
|
||||
query = ""
|
||||
threshold = 0.5
|
||||
results = 10
|
||||
threshold = defines.default_rag_threshold
|
||||
results = defines.default_rag_top_k
|
||||
if not query:
|
||||
return JSONResponse(
|
||||
{"error": "No query provided for similarity search"},
|
||||
@ -400,14 +437,15 @@ class WebServer:
|
||||
f"UMAP 3D output: {umap_3d}, length: {len(umap_3d)}"
|
||||
) # Debug output
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
**chroma_results,
|
||||
"query": query,
|
||||
"umap_embedding_2d": umap_2d,
|
||||
"umap_embedding_3d": umap_3d,
|
||||
}
|
||||
)
|
||||
return JSONResponse({
|
||||
"distances": chroma_results["distances"],
|
||||
"ids": chroma_results["ids"],
|
||||
"metadatas": chroma_results["metadatas"],
|
||||
"query": query,
|
||||
"umap_embedding_2d": umap_2d,
|
||||
"umap_embedding_3d": umap_3d,
|
||||
"size": self.file_watcher.collection.count()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
@ -64,7 +64,7 @@ class Context(BaseModel):
|
||||
return self
|
||||
|
||||
def generate_rag_results(
|
||||
self, message: Message, top_k=10, threshold=0.7
|
||||
self, message: Message, top_k=defines.default_rag_top_k, threshold=defines.default_rag_threshold
|
||||
) -> Generator[Message, None, None]:
|
||||
"""
|
||||
Generate RAG results for the given query.
|
||||
@ -124,6 +124,7 @@ class Context(BaseModel):
|
||||
**chroma_results,
|
||||
"umap_embedding_2d": umap_2d,
|
||||
"umap_embedding_3d": umap_3d,
|
||||
"size": self.file_watcher.collection.count()
|
||||
}
|
||||
)
|
||||
message.response = f"Results from {rag['name']} RAG: {len(chroma_results['documents'])} results."
|
||||
@ -176,7 +177,6 @@ class Context(BaseModel):
|
||||
|
||||
raise ValueError(f"No agent class found for agent_type: {agent_type}")
|
||||
|
||||
@classmethod
|
||||
def add_agent(self, agent: AnyAgent) -> None:
|
||||
"""Add a Agent to the context, ensuring no duplicate agent_type."""
|
||||
if any(s.agent_type == agent.agent_type for s in self.agents):
|
||||
|
@ -1,20 +1,53 @@
|
||||
import os
|
||||
|
||||
ollama_api_url = "http://ollama:11434" # Default Ollama local endpoint
|
||||
|
||||
# model = "deepseek-r1:7b" # Tool calls don"t work
|
||||
# model="mistral:7b" # Tool calls don"t work
|
||||
# model = "llama3.2"
|
||||
# model = "qwen3:8b" # Requires newer ollama
|
||||
# model = "gemma3:4b" # Requires newer ollama
|
||||
model = os.getenv("MODEL_NAME", "qwen2.5:7b")
|
||||
# model = "llama3.2" # Good results; qwen seems slightly better
|
||||
# model = "mistral:7b" # Tool calls don"t work
|
||||
model = "qwen2.5:7b" # Good results
|
||||
# model = "qwen3:8b" # Requires newer ollama
|
||||
model = os.getenv("MODEL_NAME", model)
|
||||
|
||||
# Embedding model for producing vectors to use in RAG
|
||||
embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "mxbai-embed-large")
|
||||
persist_directory = os.getenv("PERSIST_DIR", "/opt/backstory/chromadb")
|
||||
|
||||
# Maximum context size to allow the LLM to use. This starts
|
||||
# smaller and will go up if different agents are requesting larger
|
||||
# contexts. Changing context size requires the LLM to reload, which
|
||||
# can take a few seconds.
|
||||
max_context = 2048 * 8 * 2
|
||||
doc_dir = "/opt/backstory/docs/"
|
||||
|
||||
# Where to store session json files
|
||||
context_dir = "/opt/backstory/sessions"
|
||||
static_content = "/opt/backstory/frontend/deployed"
|
||||
|
||||
# Path to candidate full resume
|
||||
resume_doc = "/opt/backstory/docs/resume/resume.md"
|
||||
# Only used for testing; backstory-prod will not use this
|
||||
|
||||
# Location of frontend container's build output mapped into the container
|
||||
static_content = "/opt/backstory/frontend/deployed"
|
||||
|
||||
logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
|
||||
|
||||
# RAG and Vector DB settings
|
||||
## Where to read RAG content
|
||||
|
||||
persist_directory = os.getenv("PERSIST_DIR", "/opt/backstory/chromadb")
|
||||
doc_dir = "/opt/backstory/docs/"
|
||||
chunk_buffer = 5 # Number of lines before and after chunk beyond the portion used in embedding (to return to callers)
|
||||
|
||||
# Maximum number of entries for ChromaDB to find
|
||||
default_rag_top_k = 30
|
||||
|
||||
# Cosine Distance Equivalent Similarity Retrieval Characteristics
|
||||
# 0.2 - 0.3 0.85 - 0.90 Very strict, highly precise results only
|
||||
# 0.3 - 0.5 0.75 - 0.85 Strong relevance, good precision
|
||||
# 0.5 - 0.7 0.65 - 0.75 Balanced precision/recall
|
||||
# 0.7 - 0.9 0.55 - 0.65 Higher recall, more inclusive
|
||||
# 0.9 - 1.2 0.40 - 0.55 Very inclusive, may include tangential content
|
||||
default_rag_threshold = 0.75
|
||||
|
||||
# Only used for testing; backstory-prod does not use this
|
||||
key_path = "/opt/backstory/keys/key.pem"
|
||||
cert_path = "/opt/backstory/keys/cert.pem"
|
||||
logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
|
||||
|
@ -4,7 +4,7 @@ from markdown_it.tree import SyntaxTreeNode
|
||||
import traceback
|
||||
import logging
|
||||
import json
|
||||
|
||||
from . import defines
|
||||
|
||||
class Chunk(TypedDict):
|
||||
text: str
|
||||
@ -16,6 +16,7 @@ def clear_chunk(chunk: Chunk):
|
||||
chunk["metadata"] = {
|
||||
"doc_type": "unknown",
|
||||
"source_file": chunk["metadata"]["source_file"],
|
||||
"lines": chunk["metadata"]["lines"],
|
||||
"path": "", # This will be updated during processing
|
||||
"level": 0,
|
||||
}
|
||||
@ -48,7 +49,7 @@ class MarkdownChunker:
|
||||
ast = SyntaxTreeNode(tokens)
|
||||
|
||||
# Extract chunks with metadata
|
||||
chunks = self.extract_chunks(ast, file_path)
|
||||
chunks = self.extract_chunks(ast, file_path, len(content.splitlines()))
|
||||
|
||||
return chunks
|
||||
|
||||
@ -58,7 +59,7 @@ class MarkdownChunker:
|
||||
|
||||
return None
|
||||
|
||||
def extract_chunks(self, ast: SyntaxTreeNode, file_path: str) -> List[Chunk]:
|
||||
def extract_chunks(self, ast: SyntaxTreeNode, file_path: str, total_lines: int) -> List[Chunk]:
|
||||
"""
|
||||
Extract logical chunks from the AST with appropriate metadata.
|
||||
|
||||
@ -77,6 +78,7 @@ class MarkdownChunker:
|
||||
"text": "",
|
||||
"metadata": {
|
||||
"source_file": file_path,
|
||||
"lines": total_lines
|
||||
},
|
||||
}
|
||||
clear_chunk(chunk)
|
||||
@ -112,6 +114,7 @@ class MarkdownChunker:
|
||||
chunks: List[Chunk],
|
||||
chunk: Chunk,
|
||||
level: int,
|
||||
buffer: int = defines.chunk_buffer
|
||||
) -> int:
|
||||
is_list = False
|
||||
# Handle heading nodes
|
||||
@ -138,10 +141,13 @@ class MarkdownChunker:
|
||||
if node.nester_tokens:
|
||||
opening, closing = node.nester_tokens
|
||||
if opening and opening.map:
|
||||
(
|
||||
chunk["metadata"]["line_begin"],
|
||||
chunk["metadata"]["line_end"],
|
||||
) = opening.map
|
||||
( begin, end ) = opening.map
|
||||
metadata = chunk["metadata"]
|
||||
metadata["chunk_begin"] = max(0, begin - buffer)
|
||||
metadata["chunk_end"] = min(metadata["lines"], end + buffer)
|
||||
metadata["line_begin"] = begin
|
||||
metadata["line_end"] = end
|
||||
|
||||
chunks.append(chunk.copy())
|
||||
clear_chunk(chunk)
|
||||
|
||||
@ -180,10 +186,12 @@ class MarkdownChunker:
|
||||
if node.nester_tokens:
|
||||
opening, closing = node.nester_tokens
|
||||
if opening and opening.map:
|
||||
(
|
||||
chunk["metadata"]["line_begin"],
|
||||
chunk["metadata"]["line_end"],
|
||||
) = opening.map
|
||||
( begin, end ) = opening.map
|
||||
metadata = chunk["metadata"]
|
||||
metadata["chunk_begin"] = max(0, begin - buffer)
|
||||
metadata["chunk_end"] = min(metadata["lines"], end + buffer)
|
||||
metadata["line_begin"] = begin
|
||||
metadata["line_end"] = end
|
||||
chunks.append(chunk.copy())
|
||||
clear_chunk(chunk)
|
||||
|
||||
@ -203,10 +211,12 @@ class MarkdownChunker:
|
||||
if node.nester_tokens:
|
||||
opening, closing = node.nester_tokens
|
||||
if opening and opening.map:
|
||||
(
|
||||
chunk["metadata"]["line_begin"],
|
||||
chunk["metadata"]["line_end"],
|
||||
) = opening.map
|
||||
( begin, end ) = opening.map
|
||||
metadata = chunk["metadata"]
|
||||
metadata["chunk_begin"] = max(0, begin - buffer)
|
||||
metadata["chunk_end"] = min(metadata["lines"], end + buffer)
|
||||
metadata["line_begin"] = begin
|
||||
metadata["line_end"] = end
|
||||
chunks.append(chunk.copy())
|
||||
|
||||
return level
|
||||
|
@ -382,16 +382,9 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
|
||||
def get_embedding(self, text, normalize=True):
|
||||
"""Generate embeddings using Ollama."""
|
||||
# response = self.embedding_model.encode(text) # Outputs 384-dim vectors
|
||||
|
||||
response = self.llm.embeddings(model=defines.embedding_model, prompt=text)
|
||||
embedding = response["embedding"]
|
||||
|
||||
# response = self.llm.embeddings.create(
|
||||
# model=defines.embedding_model,
|
||||
# input=text,
|
||||
# options={"num_ctx": self.chunk_size * 3} # No need waste ctx space
|
||||
# )
|
||||
if normalize:
|
||||
normalized = self._normalize_embeddings(embedding)
|
||||
return normalized
|
||||
@ -405,14 +398,13 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
metadata = chunk["metadata"]
|
||||
|
||||
# Generate a more unique ID based on content and metadata
|
||||
content_hash = hashlib.md5(text.encode()).hexdigest()
|
||||
path_hash = ""
|
||||
if "path" in metadata:
|
||||
path_hash = hashlib.md5(metadata["source_file"].encode()).hexdigest()[
|
||||
:8
|
||||
]
|
||||
|
||||
chunk_id = f"{path_hash}_{content_hash}_{i}"
|
||||
content_hash = hashlib.md5(text.encode()).hexdigest()[:8]
|
||||
chunk_id = f"{path_hash}_{i}_{content_hash}"
|
||||
|
||||
embedding = self.get_embedding(text)
|
||||
try:
|
||||
@ -427,16 +419,23 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
logging.error(traceback.format_exc())
|
||||
logging.error(chunk)
|
||||
|
||||
def read_line_range(self, file_path, start, end, buffer=5) -> list[str]:
|
||||
def prepare_metadata(self, meta: Dict[str, Any], buffer=defines.chunk_buffer)-> str | None:
|
||||
try:
|
||||
with open(file_path, "r") as file:
|
||||
source_file = meta["source_file"]
|
||||
path_parts = source_file.split(os.sep)
|
||||
file_name = path_parts[-1]
|
||||
meta["source_file"] = file_name
|
||||
with open(source_file, "r") as file:
|
||||
lines = file.readlines()
|
||||
start = max(0, start - buffer)
|
||||
end = min(len(lines), end + buffer)
|
||||
return lines[start:end]
|
||||
meta["file_lines"] = len(lines)
|
||||
start = max(0, meta["line_begin"] - buffer)
|
||||
meta["chunk_begin"] = start
|
||||
end = min(meta["lines"], meta["line_end"] + buffer)
|
||||
meta["chunk_end"] = end
|
||||
return "".join(lines[start:end])
|
||||
except:
|
||||
logging.warning(f"Unable to open {file_path}")
|
||||
return []
|
||||
logging.warning(f"Unable to open {meta["source_file"]}")
|
||||
return None
|
||||
|
||||
# Cosine Distance Equivalent Similarity Retrieval Characteristics
|
||||
# 0.2 - 0.3 0.85 - 0.90 Very strict, highly precise results only
|
||||
@ -444,7 +443,7 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
# 0.5 - 0.7 0.65 - 0.75 Balanced precision/recall
|
||||
# 0.7 - 0.9 0.55 - 0.65 Higher recall, more inclusive
|
||||
# 0.9 - 1.2 0.40 - 0.55 Very inclusive, may include tangential content
|
||||
def find_similar(self, query, top_k=3, threshold=0.7):
|
||||
def find_similar(self, query, top_k=defines.default_rag_top_k, threshold=defines.default_rag_threshold):
|
||||
"""Find similar documents to the query."""
|
||||
|
||||
# collection is configured with hnsw:space cosine
|
||||
@ -474,13 +473,10 @@ class ChromaDBFileWatcher(FileSystemEventHandler):
|
||||
filtered_distances.append(distance)
|
||||
|
||||
for index, meta in enumerate(filtered_metadatas):
|
||||
source_file = meta["source_file"]
|
||||
del meta["source_file"]
|
||||
lines = self.read_line_range(
|
||||
source_file, meta["line_begin"], meta["line_end"]
|
||||
)
|
||||
if len(lines):
|
||||
filtered_documents[index] = "\n".join(lines)
|
||||
content = self.prepare_metadata(meta)
|
||||
if content is not None:
|
||||
filtered_documents[index] = content
|
||||
|
||||
# Return the filtered results instead of all results
|
||||
return {
|
||||
"query_embedding": query_embedding,
|
||||
|
Loading…
x
Reference in New Issue
Block a user