Caching
This commit is contained in:
parent
5f6971510a
commit
1130077c03
283
Dockerfile
283
Dockerfile
@ -1,3 +1,75 @@
|
|||||||
|
#
|
||||||
|
# Build Pyton 3.11 for use in later stages
|
||||||
|
#
|
||||||
|
FROM ubuntu:oracular AS python-build
|
||||||
|
|
||||||
|
SHELL [ "/bin/bash", "-c" ]
|
||||||
|
|
||||||
|
# Instructions Dockerfied from:
|
||||||
|
#
|
||||||
|
# https://github.com/pytorch/pytorch
|
||||||
|
#
|
||||||
|
# and
|
||||||
|
#
|
||||||
|
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
|
||||||
|
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
|
||||||
|
#
|
||||||
|
#
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
gpg \
|
||||||
|
wget \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||||
|
|
||||||
|
|
||||||
|
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
ca-certificates \
|
||||||
|
ccache \
|
||||||
|
cmake \
|
||||||
|
curl \
|
||||||
|
git \
|
||||||
|
gpg-agent \
|
||||||
|
less \
|
||||||
|
libbz2-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libjpeg-dev \
|
||||||
|
libpng-dev \
|
||||||
|
libreadline-dev \
|
||||||
|
libssl-dev \
|
||||||
|
libsqlite3-dev \
|
||||||
|
llvm \
|
||||||
|
nano \
|
||||||
|
wget \
|
||||||
|
zlib1g-dev \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||||
|
|
||||||
|
# python3 \
|
||||||
|
# python3-pip \
|
||||||
|
# python3-venv \
|
||||||
|
# python3-dev \
|
||||||
|
|
||||||
|
RUN /usr/sbin/update-ccache-symlinks
|
||||||
|
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
||||||
|
|
||||||
|
# Build Python in /opt/..., install it locally, then remove the build environment
|
||||||
|
# collapsed to a single docker layer.
|
||||||
|
WORKDIR /opt
|
||||||
|
ENV PYTHON_VERSION=3.11.9
|
||||||
|
|
||||||
|
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
|
||||||
|
&& cd Python-${PYTHON_VERSION} \
|
||||||
|
&& ./configure --prefix=/opt/python --enable-optimizations \
|
||||||
|
&& make -j$(nproc) \
|
||||||
|
&& make install \
|
||||||
|
&& cd /opt \
|
||||||
|
&& rm -rf Python-${PYTHON_VERSION}
|
||||||
|
|
||||||
FROM ubuntu:oracular AS ze-monitor
|
FROM ubuntu:oracular AS ze-monitor
|
||||||
# From https://github.com/jketreno/ze-monitor
|
# From https://github.com/jketreno/ze-monitor
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
@ -29,19 +101,75 @@ RUN cmake .. \
|
|||||||
&& make \
|
&& make \
|
||||||
&& cpack
|
&& cpack
|
||||||
|
|
||||||
|
#
|
||||||
|
# Build the ipex-llm wheel for use in later stages
|
||||||
|
#
|
||||||
|
FROM python-build AS ipex-llm-src
|
||||||
|
|
||||||
|
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||||
|
|
||||||
|
RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
|
||||||
|
&& cd /opt/ipex-llm \
|
||||||
|
&& git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
|
||||||
|
&& git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
|
||||||
|
|
||||||
|
WORKDIR /opt/ipex-llm
|
||||||
|
|
||||||
|
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
|
||||||
|
RUN { \
|
||||||
|
echo '#!/bin/bash' ; \
|
||||||
|
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||||
|
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
|
||||||
|
echo 'bash -c "${@}"' ; \
|
||||||
|
} > /opt/ipex-llm/shell ; \
|
||||||
|
chmod +x /opt/ipex-llm/shell
|
||||||
|
|
||||||
|
SHELL [ "/opt/ipex-llm/shell" ]
|
||||||
|
|
||||||
|
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||||
|
|
||||||
|
WORKDIR /opt/ipex-llm/python/llm
|
||||||
|
RUN pip install requests wheel
|
||||||
|
RUN python setup.py clean --all bdist_wheel --linux
|
||||||
|
|
||||||
|
#
|
||||||
|
# The main airc image:
|
||||||
|
# * python 3.11
|
||||||
|
# * pytorch xpu w/ ipex-llm
|
||||||
|
# * ollama-ipex-llm
|
||||||
|
# * src/server.py - model server supporting RAG and fine-tuned models
|
||||||
|
#
|
||||||
|
# Agents using server:
|
||||||
|
# * src/web-ui.py - REACT server (airc.ketrenos.com)
|
||||||
|
# * src/irc.py - IRC backend (irc.libera.chat #airc-test)
|
||||||
|
# * src/cli.py - Command line chat
|
||||||
|
#
|
||||||
|
# Utilities:
|
||||||
|
# * src/training-fine-tune.py - Perform fine-tuning on currated documents
|
||||||
FROM ubuntu:oracular AS airc
|
FROM ubuntu:oracular AS airc
|
||||||
|
|
||||||
|
COPY --from=python-build /opt/python /opt/python
|
||||||
|
|
||||||
# Get a couple prerequisites
|
# Get a couple prerequisites
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
gpg \
|
gpg \
|
||||||
python3 \
|
# python3 \
|
||||||
python3-pip \
|
# python3-pip \
|
||||||
python3-venv \
|
# python3-venv \
|
||||||
wget \
|
wget \
|
||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||||
|
|
||||||
|
# The client frontend is built using React Expo to allow
|
||||||
|
# easy creation of an Android app as well as web app
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
nodejs \
|
||||||
|
npm \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||||
|
|
||||||
# Install Intel graphics runtimes
|
# Install Intel graphics runtimes
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
|
||||||
@ -58,27 +186,41 @@ RUN apt-get update \
|
|||||||
|
|
||||||
WORKDIR /opt/airc
|
WORKDIR /opt/airc
|
||||||
|
|
||||||
|
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||||
|
|
||||||
# Setup the ollama python virtual environment
|
# Setup the ollama python virtual environment
|
||||||
RUN python3 -m venv --system-site-packages /opt/airc/venv
|
RUN python3 -m venv --system-site-packages /opt/airc/venv
|
||||||
|
|
||||||
# Setup the docker pip shell
|
# Setup the docker pip shell
|
||||||
RUN { \
|
RUN { \
|
||||||
echo '#!/bin/bash' ; \
|
echo '#!/bin/bash' ; \
|
||||||
|
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||||
|
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||||
echo 'bash -c "${@}"' ; \
|
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||||
} > /opt/airc/shell ; \
|
} > /opt/airc/shell ; \
|
||||||
chmod +x /opt/airc/shell
|
chmod +x /opt/airc/shell
|
||||||
|
|
||||||
# Activate the pip environment on all shell calls
|
# Activate the pip environment on all shell calls
|
||||||
SHELL [ "/opt/airc/shell" ]
|
SHELL [ "/opt/airc/shell" ]
|
||||||
|
|
||||||
|
|
||||||
|
# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
|
||||||
|
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||||
|
RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
|
|
||||||
|
# From https://huggingface.co/docs/bitsandbytes/main/en/installation?backend=Intel+CPU+%2B+GPU#multi-backend
|
||||||
|
RUN pip install "transformers>=4.45.1"
|
||||||
|
RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
|
||||||
|
|
||||||
# Install ollama python module
|
# Install ollama python module
|
||||||
RUN pip3 install ollama
|
RUN pip install ollama
|
||||||
|
|
||||||
# pydle does not work with newer asyncio due to coroutine
|
# pydle does not work with newer asyncio due to coroutine
|
||||||
# being deprecated. Patch to work.
|
# being deprecated. Patch to work.
|
||||||
COPY /src/pydle.patch /opt/pydle.patch
|
COPY /src/pydle.patch /opt/pydle.patch
|
||||||
|
|
||||||
RUN pip3 install pydle \
|
RUN pip install pydle \
|
||||||
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
|
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
|
||||||
-p1 < /opt/pydle.patch \
|
-p1 < /opt/pydle.patch \
|
||||||
&& rm /opt/pydle.patch
|
&& rm /opt/pydle.patch
|
||||||
@ -87,9 +229,49 @@ RUN pip install setuptools --upgrade
|
|||||||
RUN pip install ollama
|
RUN pip install ollama
|
||||||
RUN pip install feedparser bs4 chromadb
|
RUN pip install feedparser bs4 chromadb
|
||||||
RUN pip install tiktoken
|
RUN pip install tiktoken
|
||||||
|
RUN pip install flask flask_cors
|
||||||
|
RUN pip install peft datasets
|
||||||
|
|
||||||
|
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
|
||||||
|
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
|
||||||
|
|
||||||
|
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
|
||||||
|
# as well as MistralSpda* and QwenSpda* things missing (needed when loading models with )
|
||||||
|
RUN pip install "sentence_transformers<3.4.1"
|
||||||
|
# "transformers==4.40.0" ""
|
||||||
|
#RUN pip install sentence_transformers "transformers==4.40.0" "trl<0.12.0"
|
||||||
|
#RUN pip install transformers==4.45.0 "trl<0.12.0"
|
||||||
|
# trl.core doesn't have what is needed with the default 'pip install trl' version
|
||||||
|
#RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
|
||||||
|
|
||||||
|
# To get xe_linear and other Xe methods
|
||||||
|
# NOTE: As of 2025-03-10, these are only available for Python 3.11, hence
|
||||||
|
# why we build python from source
|
||||||
|
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
|
||||||
|
|
||||||
|
# NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env
|
||||||
|
RUN pip install einops diffusers # Required for IPEX optimize(), which is required to convert from Params4bit
|
||||||
|
|
||||||
|
RUN pip install yfinance pyzt geopy
|
||||||
|
|
||||||
SHELL [ "/bin/bash", "-c" ]
|
SHELL [ "/bin/bash", "-c" ]
|
||||||
|
|
||||||
|
# Don't install the full oneapi essentials; just the ones that we seem to need
|
||||||
|
# RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||||
|
# | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
|
||||||
|
# && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
|
||||||
|
# | tee /etc/apt/sources.list.d/oneAPI.list \
|
||||||
|
# && apt-get update \
|
||||||
|
# && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
# intel-oneapi-mkl-sycl-2025.0 \
|
||||||
|
# intel-oneapi-dnnl-2025.0 \
|
||||||
|
# intel-oneapi-dpcpp-cpp-2025.0 \
|
||||||
|
# && apt-get clean \
|
||||||
|
# && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||||
|
# dpcpp is needed for LoRA backend when
|
||||||
|
# libze-dev is needed for LoRA/triton backend in order to build stuff
|
||||||
|
# Unfortunately, that fails with:
|
||||||
|
# ImportError: /opt/airc/venv/lib/python3.11/site-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
libncurses6 \
|
libncurses6 \
|
||||||
@ -108,6 +290,8 @@ RUN { \
|
|||||||
echo 'echo "Container: airc"'; \
|
echo 'echo "Container: airc"'; \
|
||||||
echo 'set -e'; \
|
echo 'set -e'; \
|
||||||
echo 'echo "Setting pip environment to /opt/airc"'; \
|
echo 'echo "Setting pip environment to /opt/airc"'; \
|
||||||
|
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||||
|
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||||
echo 'source /opt/airc/venv/bin/activate'; \
|
echo 'source /opt/airc/venv/bin/activate'; \
|
||||||
echo ''; \
|
echo ''; \
|
||||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \
|
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \
|
||||||
@ -126,6 +310,11 @@ RUN { \
|
|||||||
} > /entrypoint.sh \
|
} > /entrypoint.sh \
|
||||||
&& chmod +x /entrypoint.sh
|
&& chmod +x /entrypoint.sh
|
||||||
|
|
||||||
|
# From
|
||||||
|
ENV USE_XETLA=OFF
|
||||||
|
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||||
|
ENV SYCL_CACHE_PERSISTENT=1
|
||||||
|
|
||||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||||
|
|
||||||
FROM ubuntu:oracular AS ollama
|
FROM ubuntu:oracular AS ollama
|
||||||
@ -185,7 +374,7 @@ RUN { \
|
|||||||
SHELL [ "/opt/ollama/shell" ]
|
SHELL [ "/opt/ollama/shell" ]
|
||||||
|
|
||||||
# Install ollama python module
|
# Install ollama python module
|
||||||
RUN pip3 install ollama
|
RUN pip install ollama
|
||||||
|
|
||||||
SHELL [ "/bin/bash", "-c" ]
|
SHELL [ "/bin/bash", "-c" ]
|
||||||
|
|
||||||
@ -233,13 +422,14 @@ FROM airc AS jupyter
|
|||||||
SHELL [ "/opt/airc/shell" ]
|
SHELL [ "/opt/airc/shell" ]
|
||||||
|
|
||||||
# BEGIN setup Jupyter
|
# BEGIN setup Jupyter
|
||||||
RUN pip install jupyter \
|
RUN pip install \
|
||||||
jupyterlab==4.3.0a0 \
|
jupyterlab \
|
||||||
jupyterhub==5.0.0 \
|
dash[jupyterlab] \
|
||||||
notebook==7.3.0a0 \
|
&& jupyter lab build --dev-build=False --minimize=False
|
||||||
"jupyter-server-proxy>=4.1.2"
|
|
||||||
# END setup Jupyter
|
# END setup Jupyter
|
||||||
|
|
||||||
|
RUN pip install -r /opt/airc/src/requirements.txt
|
||||||
|
|
||||||
SHELL [ "/bin/bash", "-c" ]
|
SHELL [ "/bin/bash", "-c" ]
|
||||||
|
|
||||||
RUN { \
|
RUN { \
|
||||||
@ -259,8 +449,8 @@ RUN { \
|
|||||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||||
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
|
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
|
||||||
echo 'while true; do' ; \
|
echo 'while true; do' ; \
|
||||||
echo ' echo "Launching jupyter notebook"' ; \
|
echo ' echo "Launching jupyter lab"' ; \
|
||||||
echo ' jupyter notebook \' ; \
|
echo ' jupyter lab \' ; \
|
||||||
echo ' --notebook-dir=/opt/jupyter \' ; \
|
echo ' --notebook-dir=/opt/jupyter \' ; \
|
||||||
echo ' --port 8888 \' ; \
|
echo ' --port 8888 \' ; \
|
||||||
echo ' --ip 0.0.0.0 \' ; \
|
echo ' --ip 0.0.0.0 \' ; \
|
||||||
@ -278,4 +468,67 @@ RUN { \
|
|||||||
} > /entrypoint-jupyter.sh \
|
} > /entrypoint-jupyter.sh \
|
||||||
&& chmod +x /entrypoint-jupyter.sh
|
&& chmod +x /entrypoint-jupyter.sh
|
||||||
|
|
||||||
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
|
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
|
||||||
|
|
||||||
|
FROM ubuntu:oracular AS miniircd
|
||||||
|
|
||||||
|
COPY --from=python-build /opt/python /opt/python
|
||||||
|
|
||||||
|
# Get a couple prerequisites
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
gpg \
|
||||||
|
wget \
|
||||||
|
nano \
|
||||||
|
irssi \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||||
|
|
||||||
|
WORKDIR /opt/miniircd
|
||||||
|
|
||||||
|
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||||
|
|
||||||
|
# Setup the ollama python virtual environment
|
||||||
|
RUN python3 -m venv --system-site-packages /opt/miniircd/venv
|
||||||
|
|
||||||
|
# Setup the docker pip shell
|
||||||
|
RUN { \
|
||||||
|
echo '#!/bin/bash' ; \
|
||||||
|
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||||
|
echo 'source /opt/miniircd/venv/bin/activate' ; \
|
||||||
|
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||||
|
} > /opt/miniircd/shell ; \
|
||||||
|
chmod +x /opt/miniircd/shell
|
||||||
|
|
||||||
|
# Activate the pip environment on all shell calls
|
||||||
|
SHELL [ "/opt/miniircd/shell" ]
|
||||||
|
|
||||||
|
RUN pip install miniircd
|
||||||
|
|
||||||
|
SHELL [ "/bin/bash", "-c" ]
|
||||||
|
|
||||||
|
RUN { \
|
||||||
|
echo '#!/bin/bash'; \
|
||||||
|
echo 'echo "Container: miniircd"'; \
|
||||||
|
echo 'set -e'; \
|
||||||
|
echo 'echo "Setting pip environment to /opt/miniircd"'; \
|
||||||
|
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||||
|
echo 'source /opt/miniircd/venv/bin/activate'; \
|
||||||
|
echo ''; \
|
||||||
|
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/miniircd/)?shell$ ]]; then'; \
|
||||||
|
echo ' echo "Dropping to shell"'; \
|
||||||
|
echo ' shift' ; \
|
||||||
|
echo ' echo "Running: ${@}"' ; \
|
||||||
|
echo ' if [[ "${1}" != "" ]]; then' ; \
|
||||||
|
echo ' exec ${@}'; \
|
||||||
|
echo ' else' ; \
|
||||||
|
echo ' exec /bin/bash'; \
|
||||||
|
echo ' fi' ; \
|
||||||
|
echo 'else'; \
|
||||||
|
echo ' echo "Launching IRC server..."'; \
|
||||||
|
echo ' miniircd --setuid root "${@}"' ; \
|
||||||
|
echo 'fi'; \
|
||||||
|
} > /entrypoint.sh \
|
||||||
|
&& chmod +x /entrypoint.sh
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||||
|
@ -14,9 +14,13 @@ services:
|
|||||||
- ollama
|
- ollama
|
||||||
networks:
|
networks:
|
||||||
- internal
|
- internal
|
||||||
|
ports:
|
||||||
|
- 8911:8911
|
||||||
volumes:
|
volumes:
|
||||||
- ./cache:/root/.cache
|
- ./cache:/root/.cache
|
||||||
- ./src:/opt/airc/src:rw
|
- ./src:/opt/airc/src:rw
|
||||||
|
- ./doc:/opt/airc/doc:ro
|
||||||
|
- ./results:/opt/airc/results:rw
|
||||||
cap_add: # used for running ze-monitor within airc container
|
cap_add: # used for running ze-monitor within airc container
|
||||||
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
||||||
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
||||||
@ -36,8 +40,8 @@ services:
|
|||||||
- ONEAPI_DEVICE_SELECTOR=level_zero:0
|
- ONEAPI_DEVICE_SELECTOR=level_zero:0
|
||||||
devices:
|
devices:
|
||||||
- /dev/dri:/dev/dri
|
- /dev/dri:/dev/dri
|
||||||
# ports:
|
ports:
|
||||||
# - 11434:11434 # ollama serve port
|
- 11434:11434 # ollama serve port
|
||||||
networks:
|
networks:
|
||||||
- internal
|
- internal
|
||||||
volumes:
|
volumes:
|
||||||
@ -61,13 +65,41 @@ services:
|
|||||||
- /dev/dri:/dev/dri
|
- /dev/dri:/dev/dri
|
||||||
depends_on:
|
depends_on:
|
||||||
- ollama
|
- ollama
|
||||||
|
- miniircd
|
||||||
ports:
|
ports:
|
||||||
- 8888:8888 # Jupyter Notebook
|
- 8888:8888 # Jupyter Notebook
|
||||||
|
- 60673:60673 # Gradio
|
||||||
networks:
|
networks:
|
||||||
- internal
|
- internal
|
||||||
volumes:
|
volumes:
|
||||||
- ./jupyter:/opt/jupyter:rw
|
- ./jupyter:/opt/jupyter:rw
|
||||||
- ./cache:/root/.cache
|
- ./cache:/root/.cache
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: "0" # No memory limit (Docker treats 0 as unlimited)
|
||||||
|
reservations:
|
||||||
|
memory: "0" # No reserved memory (optional)
|
||||||
|
ulimits:
|
||||||
|
memlock: -1 # Prevents memory from being locked
|
||||||
|
oom_kill_disable: true # Prevents OOM killer from killing the container
|
||||||
|
|
||||||
|
miniircd:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
target: miniircd
|
||||||
|
image: miniircd
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
devices:
|
||||||
|
- /dev/dri:/dev/dri
|
||||||
|
ports:
|
||||||
|
- 6667:6667 # IRC
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
volumes:
|
||||||
|
- ./cache:/root/.cache
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
internal:
|
internal:
|
||||||
|
@ -539,11 +539,11 @@ def create_ui():
|
|||||||
outputs=[chat_history, tool_history]
|
outputs=[chat_history, tool_history]
|
||||||
)
|
)
|
||||||
|
|
||||||
# timer.tick(check_message_queue, inputs=chatbot, outputs=chatbot).then(
|
timer.tick(check_message_queue, inputs=chatbot, outputs=chatbot).then(
|
||||||
# update_log, # This new function updates the log after chatbot processing
|
update_log, # This new function updates the log after chatbot processing
|
||||||
# inputs=chatbot,
|
inputs=chatbot,
|
||||||
# outputs=[chat_history, tool_history]
|
outputs=[chat_history, tool_history]
|
||||||
# )
|
)
|
||||||
|
|
||||||
clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history], queue=False)
|
clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history], queue=False)
|
||||||
|
|
||||||
|
562
src/chunk.py
562
src/chunk.py
@ -1,562 +0,0 @@
|
|||||||
import requests
|
|
||||||
from typing import List, Dict, Any, Union
|
|
||||||
import tiktoken
|
|
||||||
import feedparser
|
|
||||||
import logging as log
|
|
||||||
import datetime
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import chromadb
|
|
||||||
import ollama
|
|
||||||
import re
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
def normalize(vec):
|
|
||||||
return vec / np.linalg.norm(vec)
|
|
||||||
|
|
||||||
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
|
|
||||||
MODEL_NAME = "deepseek-r1:7b"
|
|
||||||
EMBED_MODEL = "mxbai-embed-large"
|
|
||||||
PERSIST_DIRECTORY = "/root/.cache/chroma"
|
|
||||||
|
|
||||||
client = ollama.Client(host=OLLAMA_API_URL)
|
|
||||||
|
|
||||||
def extract_text_from_html_or_xml(content, is_xml=False):
|
|
||||||
# Parse the content
|
|
||||||
if is_xml:
|
|
||||||
soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content
|
|
||||||
else:
|
|
||||||
soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content
|
|
||||||
|
|
||||||
# Extract and return just the text
|
|
||||||
return soup.get_text()
|
|
||||||
|
|
||||||
class Feed():
|
|
||||||
def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
|
|
||||||
self.name = name
|
|
||||||
self.url = url
|
|
||||||
self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
|
|
||||||
self.last_poll = None
|
|
||||||
self.articles = []
|
|
||||||
self.max_articles = max_articles
|
|
||||||
self.update()
|
|
||||||
|
|
||||||
def update(self):
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
|
|
||||||
log.info(f"Updating {self.name}")
|
|
||||||
feed = feedparser.parse(self.url)
|
|
||||||
self.articles = []
|
|
||||||
self.last_poll = now
|
|
||||||
|
|
||||||
if len(feed.entries) == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
for i, entry in enumerate(feed.entries[:self.max_articles]):
|
|
||||||
content = {}
|
|
||||||
content['source'] = self.name
|
|
||||||
content['id'] = f"{self.name}{i}"
|
|
||||||
title = entry.get("title")
|
|
||||||
if title:
|
|
||||||
content['title'] = title
|
|
||||||
link = entry.get("link")
|
|
||||||
if link:
|
|
||||||
content['link'] = link
|
|
||||||
text = entry.get("summary")
|
|
||||||
if text:
|
|
||||||
content['text'] = extract_text_from_html_or_xml(text, False)
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
published = entry.get("published")
|
|
||||||
if published:
|
|
||||||
content['published'] = published
|
|
||||||
|
|
||||||
self.articles.append(content)
|
|
||||||
else:
|
|
||||||
log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
|
|
||||||
return self.articles
|
|
||||||
|
|
||||||
# News RSS Feeds
|
|
||||||
rss_feeds = [
|
|
||||||
Feed(name="IGN.com", url="https://feeds.feedburner.com/ign/games-all"),
|
|
||||||
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
|
|
||||||
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
|
|
||||||
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
|
|
||||||
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
|
|
||||||
Feed(name="Time", url="https://time.com/feed/"),
|
|
||||||
Feed(name="Euronews", url="https://www.euronews.com/rss"),
|
|
||||||
# Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_encoding():
|
|
||||||
"""Get the tokenizer for counting tokens."""
|
|
||||||
try:
|
|
||||||
return tiktoken.get_encoding("cl100k_base") # Default encoding used by many embedding models
|
|
||||||
except:
|
|
||||||
return tiktoken.encoding_for_model(MODEL_NAME)
|
|
||||||
|
|
||||||
def count_tokens(text: str) -> int:
|
|
||||||
"""Count the number of tokens in a text string."""
|
|
||||||
encoding = get_encoding()
|
|
||||||
return len(encoding.encode(text))
|
|
||||||
|
|
||||||
def chunk_text(text: str, max_tokens: int = 512, overlap: int = 50) -> List[str]:
|
|
||||||
"""
|
|
||||||
Split a text into chunks based on token count with overlap between chunks.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: The text to split into chunks
|
|
||||||
max_tokens: Maximum number of tokens per chunk
|
|
||||||
overlap: Number of tokens to overlap between chunks
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of text chunks
|
|
||||||
"""
|
|
||||||
if not text or max_tokens <= 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
encoding = get_encoding()
|
|
||||||
tokens = encoding.encode(text)
|
|
||||||
chunks = []
|
|
||||||
|
|
||||||
i = 0
|
|
||||||
while i < len(tokens):
|
|
||||||
# Get the current chunk of tokens
|
|
||||||
chunk_end = min(i + max_tokens, len(tokens))
|
|
||||||
chunk_tokens = tokens[i:chunk_end]
|
|
||||||
chunks.append(encoding.decode(chunk_tokens))
|
|
||||||
|
|
||||||
# Move to the next position with overlap
|
|
||||||
if chunk_end == len(tokens):
|
|
||||||
break
|
|
||||||
i += max_tokens - overlap
|
|
||||||
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
def chunk_document(document: Dict[str, Any],
|
|
||||||
text_key: str = "text",
|
|
||||||
max_tokens: int = 512,
|
|
||||||
overlap: int = 50) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Chunk a document dictionary into multiple chunks.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
document: Document dictionary with metadata and text
|
|
||||||
text_key: The key in the document that contains the text to chunk
|
|
||||||
max_tokens: Maximum number of tokens per chunk
|
|
||||||
overlap: Number of tokens to overlap between chunks
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of document dictionaries, each with chunked text and preserved metadata
|
|
||||||
"""
|
|
||||||
if text_key not in document:
|
|
||||||
raise Exception(f"{text_key} not in document")
|
|
||||||
|
|
||||||
# Extract text and create chunks
|
|
||||||
if "title" in document:
|
|
||||||
text = f"{document["title"]}: {document[text_key]}"
|
|
||||||
else:
|
|
||||||
text = document[text_key]
|
|
||||||
chunks = chunk_text(text, max_tokens, overlap)
|
|
||||||
|
|
||||||
# Create document chunks with preserved metadata
|
|
||||||
chunked_docs = []
|
|
||||||
for i, chunk in enumerate(chunks):
|
|
||||||
# Create a new doc with all original fields
|
|
||||||
doc_chunk = document.copy()
|
|
||||||
# Replace text with the chunk
|
|
||||||
doc_chunk[text_key] = chunk
|
|
||||||
# Add chunk metadata
|
|
||||||
doc_chunk["chunk_id"] = i
|
|
||||||
doc_chunk["chunk_total"] = len(chunks)
|
|
||||||
chunked_docs.append(doc_chunk)
|
|
||||||
|
|
||||||
return chunked_docs
|
|
||||||
|
|
||||||
def init_chroma_client(persist_directory: str = PERSIST_DIRECTORY):
|
|
||||||
"""Initialize and return a ChromaDB client."""
|
|
||||||
# return chromadb.PersistentClient(path=persist_directory)
|
|
||||||
return chromadb.Client()
|
|
||||||
|
|
||||||
def create_or_get_collection(client, collection_name: str):
|
|
||||||
"""Create or get a ChromaDB collection."""
|
|
||||||
try:
|
|
||||||
return client.get_collection(
|
|
||||||
name=collection_name
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
return client.create_collection(
|
|
||||||
name=collection_name,
|
|
||||||
metadata={"hnsw:space": "cosine"}
|
|
||||||
)
|
|
||||||
|
|
||||||
def process_documents_to_chroma(
|
|
||||||
documents: List[Dict[str, Any]],
|
|
||||||
collection_name: str = "document_collection",
|
|
||||||
text_key: str = "text",
|
|
||||||
max_tokens: int = 512,
|
|
||||||
overlap: int = 50,
|
|
||||||
model: str = EMBED_MODEL,
|
|
||||||
persist_directory: str = PERSIST_DIRECTORY
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Process documents, chunk them, compute embeddings, and store in ChromaDB.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
documents: List of document dictionaries
|
|
||||||
collection_name: Name for the ChromaDB collection
|
|
||||||
text_key: The key containing text content
|
|
||||||
max_tokens: Maximum tokens per chunk
|
|
||||||
overlap: Token overlap between chunks
|
|
||||||
model: Ollama model for embeddings
|
|
||||||
persist_directory: Directory to store ChromaDB data
|
|
||||||
"""
|
|
||||||
# Initialize ChromaDB client and collection
|
|
||||||
db = init_chroma_client(persist_directory)
|
|
||||||
collection = create_or_get_collection(db, collection_name)
|
|
||||||
|
|
||||||
# Process each document
|
|
||||||
for doc in documents:
|
|
||||||
# Chunk the document
|
|
||||||
doc_chunks = chunk_document(doc, text_key, max_tokens, overlap)
|
|
||||||
|
|
||||||
# Prepare data for ChromaDB
|
|
||||||
ids = []
|
|
||||||
texts = []
|
|
||||||
metadatas = []
|
|
||||||
embeddings = []
|
|
||||||
|
|
||||||
for chunk in doc_chunks:
|
|
||||||
# Create a unique ID for the chunk
|
|
||||||
chunk_id = f"{chunk['id']}_{chunk['chunk_id']}"
|
|
||||||
|
|
||||||
# Extract text
|
|
||||||
text = chunk[text_key]
|
|
||||||
|
|
||||||
# Create metadata (excluding text and embedding to avoid duplication)
|
|
||||||
metadata = {k: v for k, v in chunk.items() if k != text_key and k != "embedding"}
|
|
||||||
|
|
||||||
response = client.embed(model=model, input=text)
|
|
||||||
embedding = response["embeddings"][0]
|
|
||||||
ids.append(chunk_id)
|
|
||||||
texts.append(text)
|
|
||||||
metadatas.append(metadata)
|
|
||||||
embeddings.append(embedding)
|
|
||||||
|
|
||||||
# Add chunks to ChromaDB collection
|
|
||||||
collection.add(
|
|
||||||
ids=ids,
|
|
||||||
documents=texts,
|
|
||||||
embeddings=embeddings,
|
|
||||||
metadatas=metadatas
|
|
||||||
)
|
|
||||||
|
|
||||||
return collection
|
|
||||||
|
|
||||||
def query_chroma(
|
|
||||||
query_text: str,
|
|
||||||
collection_name: str = "document_collection",
|
|
||||||
n_results: int = 5,
|
|
||||||
model: str = EMBED_MODEL,
|
|
||||||
persist_directory: str = PERSIST_DIRECTORY
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Query ChromaDB for similar documents.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query_text: The text to search for
|
|
||||||
collection_name: Name of the ChromaDB collection
|
|
||||||
n_results: Number of results to return
|
|
||||||
model: Ollama model for embedding the query
|
|
||||||
persist_directory: Directory where ChromaDB data is stored
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Query results from ChromaDB
|
|
||||||
"""
|
|
||||||
# Initialize ChromaDB client and collection
|
|
||||||
db = init_chroma_client(persist_directory)
|
|
||||||
collection = create_or_get_collection(db, collection_name)
|
|
||||||
|
|
||||||
query_response = client.embed(model=model, input=query_text)
|
|
||||||
query_embeddings = query_response["embeddings"]
|
|
||||||
|
|
||||||
# Query the collection
|
|
||||||
results = collection.query(
|
|
||||||
query_embeddings=query_embeddings,
|
|
||||||
n_results=n_results
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def print_top_match(query_results, index=0, documents=None):
|
|
||||||
"""
|
|
||||||
Print detailed information about the top matching document,
|
|
||||||
including the full original document content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query_results: Results from ChromaDB query
|
|
||||||
documents: Original documents dictionary to look up full content (optional)
|
|
||||||
"""
|
|
||||||
if not query_results or not query_results["ids"] or len(query_results["ids"][0]) == 0:
|
|
||||||
print("No matching documents found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Get the top result
|
|
||||||
top_id = query_results["ids"][0][index]
|
|
||||||
top_document_chunk = query_results["documents"][0][index]
|
|
||||||
top_metadata = query_results["metadatas"][0][index]
|
|
||||||
top_distance = query_results["distances"][0][index]
|
|
||||||
|
|
||||||
print("="*50)
|
|
||||||
print("MATCHING DOCUMENT")
|
|
||||||
print("="*50)
|
|
||||||
print(f"Chunk ID: {top_id}")
|
|
||||||
print(f"Similarity Score: {top_distance:.4f}") # Convert distance to similarity
|
|
||||||
|
|
||||||
print("\nCHUNK METADATA:")
|
|
||||||
for key, value in top_metadata.items():
|
|
||||||
print(f" {key}: {value}")
|
|
||||||
|
|
||||||
print("\nMATCHING CHUNK CONTENT:")
|
|
||||||
print(top_document_chunk[:500].strip() + ("..." if len(top_document_chunk) > 500 else ""))
|
|
||||||
|
|
||||||
# Extract the original document ID from the chunk ID
|
|
||||||
# Chunk IDs are in format "doc_id_chunk_num"
|
|
||||||
original_doc_id = top_id.split('_')[0]
|
|
||||||
|
|
||||||
def get_top_match(query_results, index=0, documents=None):
|
|
||||||
top_id = query_results["ids"][index][0]
|
|
||||||
# Extract the original document ID from the chunk ID
|
|
||||||
# Chunk IDs are in format "doc_id_chunk_num"
|
|
||||||
original_doc_id = top_id.split('_')[0]
|
|
||||||
|
|
||||||
# Return the full document for further processing if needed
|
|
||||||
if documents is not None:
|
|
||||||
return next((doc for doc in documents if doc["id"] == original_doc_id), None)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def show_documents(documents=None):
|
|
||||||
if not documents:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Print the top matching document
|
|
||||||
for i, doc in enumerate(documents):
|
|
||||||
print(f"Document {i+1}:")
|
|
||||||
print(f" Title: {doc['title']}")
|
|
||||||
print(f" Text: {doc['text'][:100]}...")
|
|
||||||
print()
|
|
||||||
|
|
||||||
def show_headlines(documents=None):
|
|
||||||
if not documents:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Print the top matching document
|
|
||||||
for doc in documents:
|
|
||||||
print(f"{doc['source']}: {doc['title']}")
|
|
||||||
|
|
||||||
def show_help():
|
|
||||||
print("""help>
|
|
||||||
docs Show RAG docs
|
|
||||||
full Show last full top match
|
|
||||||
headlines Show the RAG headlines
|
|
||||||
prompt Show the last prompt
|
|
||||||
response Show the last response
|
|
||||||
scores Show last RAG scores
|
|
||||||
why|think Show last response's <think>
|
|
||||||
context|match Show RAG match info to last prompt
|
|
||||||
""")
|
|
||||||
|
|
||||||
|
|
||||||
# Example usage
|
|
||||||
if __name__ == "__main__":
|
|
||||||
documents = []
|
|
||||||
for feed in rss_feeds:
|
|
||||||
documents.extend(feed.articles)
|
|
||||||
|
|
||||||
show_documents(documents=documents)
|
|
||||||
|
|
||||||
# Process documents and store in ChromaDB
|
|
||||||
collection = process_documents_to_chroma(
|
|
||||||
documents=documents,
|
|
||||||
collection_name="research_papers",
|
|
||||||
max_tokens=256,
|
|
||||||
overlap=25,
|
|
||||||
model=EMBED_MODEL,
|
|
||||||
persist_directory="/root/.cache/chroma"
|
|
||||||
)
|
|
||||||
|
|
||||||
last_results = None
|
|
||||||
last_prompt = None
|
|
||||||
last_system = None
|
|
||||||
last_response = None
|
|
||||||
last_why = None
|
|
||||||
last_messages = []
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
search_query = input("> ").strip()
|
|
||||||
except KeyboardInterrupt as e:
|
|
||||||
print("\nExiting.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if search_query == "exit" or search_query == "quit":
|
|
||||||
print("\nExiting.")
|
|
||||||
break
|
|
||||||
|
|
||||||
if search_query == "docs":
|
|
||||||
show_documents(documents)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "prompt":
|
|
||||||
if last_prompt:
|
|
||||||
print(f"""last prompt>
|
|
||||||
{"="*10}system{"="*10}
|
|
||||||
{last_system}
|
|
||||||
{"="*10}prompt{"="*10}
|
|
||||||
{last_prompt}""")
|
|
||||||
else:
|
|
||||||
print(f"No prompts yet")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "response":
|
|
||||||
if last_response:
|
|
||||||
print(f"""last response>
|
|
||||||
{"="*10}response{"="*10}
|
|
||||||
{last_response}""")
|
|
||||||
else:
|
|
||||||
print(f"No responses yet")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "" or search_query == "help":
|
|
||||||
show_help()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "headlines":
|
|
||||||
show_headlines(documents)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "match" or search_query == "context":
|
|
||||||
if last_results:
|
|
||||||
print_top_match(last_results, documents=documents)
|
|
||||||
else:
|
|
||||||
print("No match to give info on")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "why" or search_query == "think":
|
|
||||||
if last_why:
|
|
||||||
print(f"""
|
|
||||||
why>
|
|
||||||
{last_why}
|
|
||||||
""")
|
|
||||||
else:
|
|
||||||
print("No processed prompts")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "scores":
|
|
||||||
if last_results:
|
|
||||||
for i, _ in enumerate(last_results):
|
|
||||||
print_top_match(last_results, documents=documents, index=i)
|
|
||||||
else:
|
|
||||||
print("No match to give info on")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if search_query == "full":
|
|
||||||
if last_results:
|
|
||||||
full = get_top_match(last_results, documents=documents)
|
|
||||||
if full:
|
|
||||||
print(f"""Context:
|
|
||||||
Source: {full["source"]}
|
|
||||||
Title: {full["title"]}
|
|
||||||
Link: {full["link"]}
|
|
||||||
Distance: {last_results.get("distances", [[0]])[0][0]}
|
|
||||||
Full text:
|
|
||||||
{full["text"]}""")
|
|
||||||
else:
|
|
||||||
print("No match to give info on")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Query ChromaDB
|
|
||||||
results = query_chroma(
|
|
||||||
query_text=search_query,
|
|
||||||
collection_name="research_papers",
|
|
||||||
n_results=10
|
|
||||||
)
|
|
||||||
last_results = results
|
|
||||||
|
|
||||||
full = get_top_match(results, documents=documents)
|
|
||||||
|
|
||||||
headlines = ""
|
|
||||||
for doc in documents:
|
|
||||||
headlines += f"{doc['source']}: {doc['title']}\n"
|
|
||||||
|
|
||||||
system="""
|
|
||||||
You are the assistant. Your name is airc.
|
|
||||||
|
|
||||||
Do not ask to help the user further.
|
|
||||||
|
|
||||||
Provide short (less than 100 character) responses.
|
|
||||||
|
|
||||||
Rules:
|
|
||||||
* If the user asks for information about the AI model, how, or who wrote it, provide information about the author from inside the <author></author> tags.
|
|
||||||
* If you think the user might be asking about the author, ask a follow up question to clarify.
|
|
||||||
* If there is news in between the <input></input> tags relevant to the prompt, use that. Always mention the source when information comes from an item. If asked for the link, provide it.
|
|
||||||
* Respond to the prompt in a single, direct response.
|
|
||||||
* Do not prefix it with a word like "Answer"
|
|
||||||
|
|
||||||
You must follow the rules.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# * If a user asks for weather information, include in your response "{{weather_query("country", "city", "state")}}" where the description of the weather should go.
|
|
||||||
|
|
||||||
context = f"""<author>
|
|
||||||
author={[
|
|
||||||
{'info': 'James wrote the python application that is driving this RAG model on top of deepseek-r1:7b. You can find it at https://github.com/jketreno/airc'},
|
|
||||||
{'info': 'James Ketrenos wrote the program deploying this AI model with RAG.'},
|
|
||||||
{'info': 'James Ketrenos is a software engineer with a history in all levels of the computer stack, from the kernel to full-stack web applications. He dabbles in AI/ML and is familiar with pytorch and ollama.'},
|
|
||||||
{'info': 'James lives in Portland, Oregon and has three kids. Two are attending Oregon State University and one is attending Williamette University.'}
|
|
||||||
]}
|
|
||||||
</author>"""
|
|
||||||
|
|
||||||
|
|
||||||
context += "<input>additional information unrelated to James Ketrenos = ["
|
|
||||||
for doc in documents:
|
|
||||||
item = {'source':doc["source"],'article':{'title':doc["title"],'link':doc["link"],'text':doc["text"]}}
|
|
||||||
context += f"{item}"
|
|
||||||
context += """]
|
|
||||||
</input>
|
|
||||||
"""
|
|
||||||
prompt = f"{context}{search_query}"
|
|
||||||
last_prompt = prompt
|
|
||||||
last_system = system
|
|
||||||
if len(last_messages) != 0:
|
|
||||||
message_context = f"{last_messages}"
|
|
||||||
prompt = f"{message_context}{prompt}"
|
|
||||||
|
|
||||||
print(f"system len: {len(system)}")
|
|
||||||
print(f"prompt len: {len(prompt)}")
|
|
||||||
output = client.generate(
|
|
||||||
model=MODEL_NAME,
|
|
||||||
system=f"{system}{context}",
|
|
||||||
prompt=prompt,
|
|
||||||
stream=False,
|
|
||||||
options={ 'num_ctx': 100000 }
|
|
||||||
)
|
|
||||||
# Prune off the <think>...</think>
|
|
||||||
matches = re.match(r'^<think>(.*?)</think>(.*)$', output['response'], flags=re.DOTALL)
|
|
||||||
if matches:
|
|
||||||
last_why = matches[1].strip()
|
|
||||||
content = matches[2].strip()
|
|
||||||
else:
|
|
||||||
print(f"[garbled] response>\n{output['response']}")
|
|
||||||
print(f"Response>\n{content}")
|
|
||||||
|
|
||||||
last_response = content
|
|
||||||
last_messages.extend(({
|
|
||||||
'role': 'user',
|
|
||||||
'name': 'james',
|
|
||||||
'message': search_query
|
|
||||||
}, {
|
|
||||||
'role': 'assistant',
|
|
||||||
'message': content
|
|
||||||
}))
|
|
||||||
last_messages = last_messages[:10]
|
|
Loading…
x
Reference in New Issue
Block a user