From 8027b5f8e3d63c8ba7520b589cd5d4adc3cd26e2 Mon Sep 17 00:00:00 2001
From: James Ketrenos <james_git@ketrenos.com>
Date: Fri, 7 Mar 2025 16:21:30 -0800
Subject: [PATCH] Switching to ollama

---
 .gitignore         |   1 +
 Dockerfile         | 377 +++++++++++++++---------------------
 docker-compose.yml |  40 ++++
 src/airc.py        | 189 ++++++++++++------
 src/chat.py        | 209 ++++++++++++++++++++
 src/chunk.py       | 468 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1003 insertions(+), 281 deletions(-)
 create mode 100644 src/chat.py
 create mode 100644 src/chunk.py

diff --git a/.gitignore b/.gitignore
index 4a58eca..68fe6ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 .env
 cache/**
 jupyter/**
+ollama/**
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 8f7881b..ac69739 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,74 +1,3 @@
-FROM ubuntu:oracular AS pytorch-build
-
-SHELL [ "/bin/bash", "-c" ]
-
-# Instructions Dockerfied from:
-#
-# https://github.com/pytorch/pytorch
-#
-# and
-#
-# https://pytorch.org/docs/stable/notes/get_start_xpu.html
-# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
-# 
-#
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    gpg \
-    wget \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-
-# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
-
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    build-essential \
-    ca-certificates \
-    ccache \
-    cmake \
-    curl \
-    git \
-    gpg-agent \
-    less \
-    libbz2-dev \
-    libffi-dev \
-    libjpeg-dev \
-    libpng-dev \
-    libreadline-dev \
-    libssl-dev \
-    libsqlite3-dev \
-    llvm \
-    nano \
-    wget \
-    zlib1g-dev \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-#    python3 \
-#    python3-pip \
-#    python3-venv \
-#    python3-dev \
-
-RUN /usr/sbin/update-ccache-symlinks
-RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
-
-# Build Python in /opt/..., install it locally, then remove the build environment 
-# collapsed to a single docker layer.
-WORKDIR /opt
-ENV PYTHON_VERSION=3.11.9
-
-RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
-    && cd Python-${PYTHON_VERSION} \
-    && ./configure --prefix=/opt/python --enable-optimizations \
-    && make -j$(nproc) \
-    && make install \
-    && cd /opt \
-    && rm -rf Python-${PYTHON_VERSION}
-
-WORKDIR /opt/pytorch
-
 FROM ubuntu:oracular AS ze-monitor
 # From https://github.com/jketreno/ze-monitor
 RUN apt-get update \
@@ -100,10 +29,20 @@ RUN cmake .. \
     && make \
     && cpack
 
-FROM pytorch-build AS pytorch
+FROM ubuntu:oracular AS airc
 
-COPY --from=pytorch-build /opt/pytorch /opt/pytorch
+# Get a couple prerequisites
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    gpg \
+    python3 \
+    python3-pip \
+    python3-venv \
+    wget \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
 
+# Install Intel graphics runtimes
 RUN apt-get update \
     && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
     && add-apt-repository -y ppa:kobuk-team/intel-graphics \
@@ -117,74 +56,176 @@ RUN apt-get update \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
 
-RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
+WORKDIR /opt/airc
 
-# When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries.
-ENV SYCL_CACHE_PERSISTENT=1
-
-WORKDIR /opt/pytorch
+# Setup the ollama python virtual environment
+RUN python3 -m venv --system-site-packages /opt/airc/venv
 
+# Setup the docker pip shell
 RUN { \
     echo '#!/bin/bash' ; \
-    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
-    echo 'source /opt/pytorch/venv/bin/activate' ; \
+    echo 'source /opt/airc/venv/bin/activate' ; \
     echo 'bash -c "${@}"' ; \
-    } > /opt/pytorch/shell ; \
-    chmod +x /opt/pytorch/shell
+    } > /opt/airc/shell ; \
+    chmod +x /opt/airc/shell
 
-RUN python3 -m venv --system-site-packages /opt/pytorch/venv
+# Activate the pip environment on all shell calls
+SHELL [ "/opt/airc/shell" ]
 
-SHELL [ "/opt/pytorch/shell" ]
+# Install ollama python module
+RUN pip3 install ollama
+# pydle does not work with newer asyncio due to coroutine
+# being deprecated. Patch to work.
+COPY /src/pydle.patch /opt/pydle.patch
 
-RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
-RUN pip3 freeze > /opt/pytorch/requirements.txt
+RUN pip3 install pydle \
+    && patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
+    -p1 < /opt/pydle.patch \
+    && rm /opt/pydle.patch
+
+RUN pip install setuptools --upgrade
+RUN pip install ollama 
+RUN pip install feedparser bs4 chromadb
+
+SHELL [ "/bin/bash", "-c" ]
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libncurses6 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
+RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
+
+COPY /src/ /opt/airc/src/
 
 SHELL [ "/bin/bash", "-c" ]
 
 RUN { \
-    echo '#!/bin/bash' ; \
-    echo 'echo "Container: pytorch"' ; \
-    echo 'set -e' ; \
-    echo 'echo "Setting pip environment to /opt/pytorch"' ; \
-    echo 'source /opt/pytorch/venv/bin/activate'; \
-    echo 'if [[ "${1}" == "" ]] || [[ "${1}" == "shell" ]]; then' ; \
-    echo '  echo "Dropping to shell"' ; \
-    echo '  /bin/bash -c "source /opt/pytorch/venv/bin/activate ; /bin/bash"' ; \
-    echo 'else' ; \
-    echo '  exec "${@}"' ; \
-    echo 'fi' ; \
+    echo '#!/bin/bash'; \
+    echo 'echo "Container: airc"'; \
+    echo 'set -e'; \
+    echo 'echo "Setting pip environment to /opt/airc"'; \
+    echo 'source /opt/airc/venv/bin/activate'; \
+    echo ''; \
+    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \
+    echo '  echo "Dropping to shell"'; \
+    echo '  shift' ; \
+    echo '  echo "Running: ${@}"' ; \
+    echo '  if [[ "${1}" != "" ]]; then' ; \
+    echo '    exec ${@}'; \
+    echo '  else' ; \
+    echo '    exec /bin/bash'; \
+    echo '  fi' ; \
+    echo 'else'; \
+    echo '  echo "Launching AIRC chat server..."'; \
+    echo '  python src/airc.py "${@}"' ; \
+    echo 'fi'; \
     } > /entrypoint.sh \
     && chmod +x /entrypoint.sh
 
 ENTRYPOINT [ "/entrypoint.sh" ]
 
-FROM pytorch AS ipex-llm-src
+FROM ubuntu:oracular AS ollama
 
-# Build ipex-llm from source
+# Get a couple prerequisites
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    gpg \
+    wget \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
 
-RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
-    && cd /opt/ipex-llm \
-    && git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
-    && git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
+# Install Intel graphics runtimes
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
+    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libze-intel-gpu1 \
+    libze1 \
+    intel-ocloc \
+    intel-opencl-icd \
+    xpu-smi \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
 
-WORKDIR /opt/ipex-llm
+WORKDIR /opt/ollama
 
-RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
+# Download the nightly ollama release from ipex-llm
+RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
+    tar --strip-components=1 -C . -xzv 
+
+# Install Python from Oracular (ollama works with 3.12)
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    gpg \
+    python3 \
+    python3-pip \
+    python3-venv \
+    wget \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+# Setup the ollama python virtual environment
+RUN python3 -m venv --system-site-packages /opt/ollama/venv
+
+# Setup the docker pip shell
 RUN { \
     echo '#!/bin/bash' ; \
     update-alternatives --set python3 /opt/python/bin/python3.11 ; \
-    echo 'source /opt/ipex-llm/venv/bin/activate' ; \
+    echo 'source /opt/ollama/venv/bin/activate' ; \
     echo 'bash -c "${@}"' ; \
-    } > /opt/ipex-llm/shell ; \
-    chmod +x /opt/ipex-llm/shell
+    } > /opt/ollama/shell ; \
+    chmod +x /opt/ollama/shell
 
-SHELL [ "/opt/ipex-llm/shell" ]
+# Activate the pip environment on all shell calls
+SHELL [ "/opt/ollama/shell" ]
 
-RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
+# Install ollama python module
+RUN pip3 install ollama
 
-WORKDIR /opt/ipex-llm/python/llm
-RUN pip install requests wheel
-RUN python setup.py clean --all bdist_wheel --linux
+SHELL [ "/bin/bash", "-c" ]
+
+RUN { \
+    echo '#!/bin/bash'; \
+    echo 'echo "Container: ollama"'; \
+    echo 'set -e'; \
+    echo 'echo "Setting pip environment to /opt/ollama"'; \
+    echo 'source /opt/ollama/venv/bin/activate'; \
+    echo 'export OLLAMA_NUM_GPU=999'; \
+    echo 'export ZES_ENABLE_SYSMAN=1'; \
+    echo 'export SYCL_CACHE_PERSISTENT=1'; \
+    echo 'export OLLAMA_KEEP_ALIVE=-1'; \
+    echo 'export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1'; \
+    echo ''; \
+    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
+    echo '  echo "Dropping to shell"'; \
+    echo '  exec /bin/bash'; \
+    echo 'else'; \
+    echo '  echo "Launching Ollama server..."'; \
+    echo '  exec ./ollama serve'; \
+    echo 'fi'; \
+    } > /entrypoint.sh \
+    && chmod +x /entrypoint.sh
+
+RUN { \
+    echo '#!/bin/bash'; \
+    echo 'echo "Container: ollama"'; \
+    echo 'set -e'; \
+    echo 'echo "Setting pip environment to /opt/ollama"'; \
+    echo 'source /opt/ollama/venv/bin/activate'; \
+    echo './ollama pull mxbai-embed-large' ; \
+    echo './ollama pull deepseek-r1:7b' ; \
+    } > /fetch-models.sh \
+    && chmod +x /fetch-models.sh
+
+ENV PYTHONUNBUFFERED=1
+
+VOLUME [" /root/.ollama" ]
+
+ENTRYPOINT [ "/entrypoint.sh" ]
 
 FROM airc AS jupyter
 
@@ -236,112 +277,4 @@ RUN { \
     } > /entrypoint-jupyter.sh \
     && chmod +x /entrypoint-jupyter.sh
 
-ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
-
-FROM pytorch AS airc
-
-RUN python3 -m venv --system-site-packages /opt/airc/venv
-
-# Don't install the full oneapi essentials; just the ones that we seem to need
-RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
-    | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
-    && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
-    | tee /etc/apt/sources.list.d/oneAPI.list \
-    && apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    intel-oneapi-mkl-sycl-2025.0 \
-    intel-oneapi-dnnl-2025.0 \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
-
-RUN { \
-    echo '#!/bin/bash' ; \
-    echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
-    echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
-    echo 'source /opt/airc/venv/bin/activate' ; \
-    echo 'if [[ "$1" == "" ]]; then bash -c; else bash -c "${@}"; fi' ; \
-    } > /opt/airc/shell ; \
-    chmod +x /opt/airc/shell
-
-SHELL [ "/opt/airc/shell" ]
-
-RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
-# Install ipex-llm built in ipex-llm-src
-COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
-RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
-
-COPY src/ /opt/airc/src/
-
-# pydle does not work with newer asyncio due to coroutine
-# being deprecated. Patch to work.
-RUN pip3 install pydle transformers sentencepiece accelerate \
-    && patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
-    -p1 < /opt/airc/src/pydle.patch
-
-# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
-# as well as MistralSpda* things missing
-RUN pip install "sentence_transformers<3.4.1" "transformers==4.40.0"
-
-# To get xe_linear and other Xe methods    
-RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
-
-# trl.core doesn't have what is needed with the default 'pip install trl' version
-RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
-
-# Needed by src/model-server.py
-RUN pip install flask
-
-SHELL [ "/bin/bash", "-c" ]
-
-RUN { \
-    echo '#!/bin/bash' ; \
-    echo 'set -e' ; \
-    echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
-    echo '  if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
-    echo '    echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
-    echo '    exit 1' ; \
-    echo '  else' ; \
-    echo '    if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
-    echo '    echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
-    echo '  fi' ; \
-    echo 'fi' ; \
-    echo 'echo "Container: airc"' ; \
-    echo 'echo "Setting pip environment to /opt/airc"' ; \
-    echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
-    echo 'source /opt/airc/venv/bin/activate'; \
-    echo 'if [[ "${1}" == "shell" ]] || [[ "${1}" == "/bin/bash" ]]; then' ; \
-    echo '  echo "Dropping to shell"' ; \
-    echo '  /bin/bash -c "source /opt/airc/venv/bin/activate ; /bin/bash"' ; \
-    echo '  exit $?' ; \
-    echo 'else' ; \
-    echo '  while true; do' ; \
-    echo '    echo "Launching model-server"' ; \
-    echo '    python src/model-server.py \' ; \
-    echo '      2>&1 | tee -a "/root/.cache/model-server.log"'; \
-    echo '    echo "model-server died ($?). Restarting."' ; \
-    echo '    sleep 5' ; \
-    echo '  done &' ; \
-    echo '  while true; do' ; \
-    echo '    echo "Launching airc"' ; \
-    echo '    python src/airc.py "${@}" \' ; \
-    echo '      2>&1 | tee -a "/root/.cache/airc.log"' ; \
-    echo '    echo "airc died ($?). Restarting."' ; \
-    echo '    sleep 5' ; \
-    echo '  done' ; \
-    echo 'fi' ; \
-    } > /entrypoint-airc.sh \
-    && chmod +x /entrypoint-airc.sh
-
-COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
-RUN dpkg -i /opt/ze-monitor-*deb
-
-WORKDIR /opt/airc
-
-SHELL [ "/opt/airc/shell" ]
-
-# Needed by src/model-server.py
-RUN pip install faiss-cpu sentence_transformers feedparser bs4
-
-SHELL [ "/bin/bash", "-c" ]
-
-ENTRYPOINT [ "/entrypoint-airc.sh" ]
+ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 07d304f..4ab5ddd 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,6 +10,10 @@ services:
       - .env
     devices:
       - /dev/dri:/dev/dri
+    depends_on:
+      - ollama
+    networks:
+      - internal
     volumes:
       - ./cache:/root/.cache
       - ./src:/opt/airc/src:rw
@@ -18,6 +22,33 @@ services:
       - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
       - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
 
+  ollama:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: ollama
+    image: ollama
+    restart: "no"
+    env_file:
+      - .env
+    environment:
+      - OLLAMA_HOST=0.0.0.0
+      - ONEAPI_DEVICE_SELECTOR=level_zero:0
+    devices:
+      - /dev/dri:/dev/dri
+#    ports:
+#      - 11434:11434 # ollama serve port
+    networks:
+      - internal
+    volumes:
+      - ./cache:/root/.cache   # Cache hub models and neo_compiler_cache
+      - ./ollama:/root/.ollama # Cache the ollama models
+      - ./src:/opt/airc/src:rw # Live mount src 
+    cap_add: # used for running ze-monitor within airc container
+      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
+      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
+      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
+
   jupyter:
     build:
       context: .
@@ -28,8 +59,17 @@ services:
       - .env
     devices:
       - /dev/dri:/dev/dri
+    depends_on:
+      - ollama
     ports:
     - 8888:8888 # Jupyter Notebook
+    networks:
+      - internal
     volumes:
       - ./jupyter:/opt/jupyter:rw
       - ./cache:/root/.cache
+
+networks:
+  internal:
+    driver: bridge
+
diff --git a/src/airc.py b/src/airc.py
index e8c9a05..9a653e8 100644
--- a/src/airc.py
+++ b/src/airc.py
@@ -1,5 +1,4 @@
 import asyncio
-import aiohttp
 import argparse
 import pydle
 import logging
@@ -9,7 +8,15 @@ import time
 import datetime
 import asyncio
 import json
+import ollama
 from typing import Dict, Any
+import ollama
+import chromadb
+import feedparser
+from bs4 import BeautifulSoup
+
+OLLAMA_API_URL = "http://ollama:11434"  # Default Ollama local endpoint
+MODEL_NAME = "deepseek-r1:7b"
 
 def parse_args():
     parser = argparse.ArgumentParser(description="AI is Really Cool")
@@ -22,50 +29,6 @@ def parse_args():
                         default='INFO', help='Set the logging level.')
     return parser.parse_args()
 
-class AsyncOpenAIClient:
-    def __init__(self, base_url: str = "http://localhost:5000"):
-        logging.info(f"Using {base_url} as server")
-        self.base_url = base_url
-        self.session = None
-
-    async def __aenter__(self):
-        self.session = aiohttp.ClientSession()
-        return self
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        if self.session:
-            await self.session.close()
-
-    async def chat_completion(self, 
-                            messages: list,
-                            model: str = "my-model",
-                            temperature: float = 0.7,
-                            max_tokens: int = 100) -> Dict[str, Any]:
-        """
-        Make an async chat completion request
-        """
-        url = f"{self.base_url}/v1/chat/completions"
-        
-        # Prepare the request payload
-        payload = {
-            "model": model,
-            "messages": messages,
-            "temperature": temperature,
-            "max_tokens": max_tokens
-        }
-
-        try:
-            async with self.session.post(url, json=payload) as response:
-                if response.status != 200:
-                    error_text = await response.text()
-                    raise Exception(f"Request failed with status {response.status}: {error_text}")
-                
-                return await response.json()
-        
-        except Exception as e:
-            print(f"Error during request: {str(e)}")
-            return {"error": str(e)}
-
 def setup_logging(level):
     numeric_level = getattr(logging, level.upper(), None)
     if not isinstance(numeric_level, int):
@@ -74,6 +37,100 @@ def setup_logging(level):
     logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
     logging.info(f"Logging is set to {level} level.")
 
+
+client = ollama.Client(host=OLLAMA_API_URL)
+
+def extract_text_from_html_or_xml(content, is_xml=False):
+    # Parse the content
+    if is_xml:
+        soup = BeautifulSoup(content, 'xml')  # Use 'xml' parser for XML content
+    else:
+        soup = BeautifulSoup(content, 'html.parser')  # Default to 'html.parser' for HTML content
+
+    # Extract and return just the text
+    return soup.get_text()
+
+class Feed():
+    def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
+        self.name = name
+        self.url = url
+        self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
+        self.last_poll = None
+        self.articles = []
+        self.max_articles = max_articles
+        self.update()
+
+    def update(self):
+        now = datetime.datetime.now()
+        if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
+            logging.info(f"Updating {self.name}")
+            feed = feedparser.parse(self.url)
+            self.articles = []
+            self.last_poll = now
+
+            content = ""
+            if len(feed.entries) > 0:
+                content += f"Source: {self.name}\n"
+            for entry in feed.entries[:self.max_articles]:
+                title = entry.get("title")
+                if title:
+                    content += f"Title: {title}\n"
+                link = entry.get("link")
+                if link:
+                    content += f"Link: {link}\n"
+                summary = entry.get("summary")
+                if summary:
+                    summary = extract_text_from_html_or_xml(summary, False)
+                    content += f"Summary: {summary}\n"
+                published = entry.get("published")
+                if published:
+                    content += f"Published: {published}\n"
+                content += "\n"
+
+                self.articles.append(content)
+        else:
+            logging.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
+        return self.articles
+
+
+# News RSS Feeds
+rss_feeds = [
+    Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
+    Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
+    Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
+    Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
+    Feed(name="Time", url="https://time.com/feed/"),
+    Feed(name="Euronews", url="https://www.euronews.com/rss"),
+    Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
+]
+
+documents = [
+  "Llamas like to eat penguins",
+  "Llamas are not vegetarians and have very efficient digestive systems",
+  "Llamas live to be about 120 years old, though some only live for 15 years and others live to be 90 years old",
+]
+
+import chromadb
+
+# Initialize ChromaDB Client
+db = chromadb.PersistentClient(path="/root/.cache/chroma.db")
+
+# We want to save the collection to disk to analyze it offline, but we don't
+# want to re-use it
+collection = db.get_or_create_collection("docs")
+
+# store each document in a vector embedding database
+for i, feed in enumerate(rss_feeds):
+    # Use the client instance instead of the global ollama module
+    for j, article in enumerate(feed.articles):
+        response = client.embeddings(model="mxbai-embed-large", prompt=article)
+        embeddings = response["embedding"]  # Note: it's "embedding", not "embeddings"
+        collection.add(
+            ids=[str(i)+str(j)],
+            embeddings=embeddings,
+            documents=[article]
+        )
+
 class AIRC(pydle.Client):
     def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0):
         super().__init__(nick)
@@ -89,6 +146,8 @@ class AIRC(pydle.Client):
         self._message_queue = asyncio.Queue()
         self._task = asyncio.create_task(self._send_from_queue())
         self.client = client
+        self.queries = 0
+        self.processing = datetime.timedelta(minutes=0)
 
     async def _send_from_queue(self):
         """Background task that sends queued messages with burst + rate limiting."""
@@ -157,18 +216,31 @@ class AIRC(pydle.Client):
             if body == "stats":
                 content = f"{self.queries} queries handled in {self.processing}s"
             else:
-                # Sample messages
-                messages = [
-                    {"role": "system", "content": self.system_input},
-                    {"role": "user", "content": body}
-                ]
+                self.queries += 1
+                start = datetime.datetime.now()
+                query_text = body
+                query_response = client.embeddings(model="mxbai-embed-large", prompt=query_text)
+                query_embedding = query_response["embedding"]  # Note: singular "embedding", not plural
 
-                # Make the request
-                response = await self.client.chat_completion(messages)
+                # Then run the query with the correct structure
+                results = collection.query(
+                    query_embeddings=[query_embedding],  # Make sure this is a list containing the embedding
+                    n_results=3
+                )
+                data = results['documents'][0][0]
+                logging.info(f"Data for {query_text}: {data}")
+                logging.info(f"From {results}")
+                output = client.generate(
+                    model=MODEL_NAME,
+                    system=f"Your are {self.nick}. In your response, make reference to this data if appropriate: {data}",
+                    prompt=f"Respond to this prompt: {query_text}",
+                    stream=False
+                )
+                end = datetime.datetime.now()
+                self.processing = self.processing + end - start
 
-                # Extract and print just the assistant's message if available
-                if "choices" in response and len(response["choices"]) > 0:
-                    content = response["choices"][0]["message"]["content"]
+                # Prune off the <think>...</think>
+                content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
 
             if content:
                 logging.info(f'Sending: {content}')
@@ -184,10 +256,9 @@ async def main():
     # Setup logging based on the provided level
     setup_logging(args.level)
 
-    async with AsyncOpenAIClient(base_url=args.ai_server) as client:
-        bot = AIRC(args.nickname, args.channel, client)
-        await bot.connect(args.server, args.port, tls=False)
-        await bot.handle_forever()
+    bot = AIRC(args.nickname, args.channel, client)
+    await bot.connect(args.server, args.port, tls=False)
+    await bot.handle_forever()
 
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/src/chat.py b/src/chat.py
new file mode 100644
index 0000000..6b2b8ae
--- /dev/null
+++ b/src/chat.py
@@ -0,0 +1,209 @@
+import logging as log
+import argparse
+import re
+import datetime
+import ollama
+import chromadb
+import feedparser
+from bs4 import BeautifulSoup
+
+OLLAMA_API_URL = "http://ollama:11434"  # Default Ollama local endpoint
+MODEL_NAME = "deepseek-r1:7b"
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="AI is Really Cool")
+    parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
+    parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 
+                        default='INFO', help='Set the log level.')
+    return parser.parse_args()
+
+def setup_logging(level):
+    numeric_level = getattr(log, level.upper(), None)
+    if not isinstance(numeric_level, int):
+        raise ValueError(f"Invalid log level: {level}")
+    
+    log.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
+    log.info(f"Logging is set to {level} level.")
+
+def extract_text_from_html_or_xml(content, is_xml=False):
+    # Parse the content
+    if is_xml:
+        soup = BeautifulSoup(content, 'xml')  # Use 'xml' parser for XML content
+    else:
+        soup = BeautifulSoup(content, 'html.parser')  # Default to 'html.parser' for HTML content
+
+    # Extract and return just the text
+    return soup.get_text()
+
+class Feed():
+    def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
+        self.name = name
+        self.url = url
+        self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
+        self.last_poll = None
+        self.articles = []
+        self.max_articles = max_articles
+        self.update()
+
+    def update(self):
+        now = datetime.datetime.now()
+        if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
+            log.info(f"Updating {self.name}")
+            feed = feedparser.parse(self.url)
+            self.articles = []
+            self.last_poll = now
+
+            content = ""
+            if len(feed.entries) > 0:
+                content += f"Source: {self.name}\n"
+            for entry in feed.entries[:self.max_articles]:
+                title = entry.get("title")
+                if title:
+                    content += f"Title: {title}\n"
+                link = entry.get("link")
+                if link:
+                    content += f"Link: {link}\n"
+                summary = entry.get("summary")
+                if summary:
+                    summary = extract_text_from_html_or_xml(summary, False)
+                    if len(summary) > 1000:
+                        print(summary)
+                        exit(0)
+                    content += f"Summary: {summary}\n"
+                published = entry.get("published")
+                if published:
+                    content += f"Published: {published}\n"
+                content += "\n"
+
+                self.articles.append(content)
+        else:
+            log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
+        return self.articles
+
+
+class Chat():
+    def __init__(self, nick):
+        super().__init__()
+        self.nick = nick
+        self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
+        self.queries = 0
+        self.processing = datetime.timedelta(minutes=0)
+
+    def message(self, target, message):
+        """Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message."""
+        lines = message.splitlines()  # Splits on both '\n' and '\r\n'
+        
+        # Process the first 10 lines
+        for line in lines[:10]:
+            if line.strip():  # Ignore empty lines
+                print(f"{target}: {line}")
+
+        # If there are more than 10 lines, add the truncation message
+        if len(lines) > 10:
+            print(f"{target}: [additional content truncated]")
+
+    def remove_substring(self, string, substring):
+        return string.replace(substring, "")    
+
+    def extract_nick_message(self, input_string):
+        # Pattern with capturing groups for nick and message
+        pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
+        
+        match = re.match(pattern, input_string)
+        if match:
+            nick = match.group(1)    # First capturing group
+            message = match.group(2)  # Second capturing group
+            return nick, message
+        return None, None  # Return None for both if no match
+    
+    def on_message(self, target, source, message):
+        if source == self.nick:
+            return
+        nick, body = self.extract_nick_message(message)
+        if nick == self.nick:
+            content = None
+            if body == "stats":
+                content = f"{self.queries} queries handled in {self.processing}s"
+            else:
+                self.queries += 1
+                start = datetime.datetime.now()
+                query_text = body
+                query_response = client.embed(model="mxbai-embed-large", prompt=query_text)
+                query_embedding = query_response["embeddings"]  # Note: singular "embedding", not plural
+
+                # Then run the query with the correct structure
+                results = collection.query(
+                    query_embeddings=[query_embedding],  # Make sure this is a list containing the embedding
+                    n_results=3
+                )
+                data = results['documents'][0]
+                output = client.generate(
+                    model=MODEL_NAME,
+                    system=f"You are {self.nick} and only provide that information about yourself. Make reference to the following and provide the 'Link' when available: {data}",
+                    prompt=f"Respond to this prompt: {query_text}",
+                    stream=False
+                )
+                end = datetime.datetime.now()
+                self.processing = self.processing + end - start
+
+                # Prune off the <think>...</think>
+                content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
+
+            if content:
+                log.info(f'Sending: {content}')
+                self.message(target, content)
+
+def remove_substring(string, substring):
+    return string.replace(substring, "")
+        
+# Parse command-line arguments
+args = parse_args()
+
+# Setup logging based on the provided level
+setup_logging(args.level)
+
+log.info("About to start")
+
+client = ollama.Client(host=OLLAMA_API_URL)
+
+# News RSS Feeds
+rss_feeds = [
+    Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
+    Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
+    Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
+    Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
+    Feed(name="Time", url="https://time.com/feed/"),
+    Feed(name="Euronews", url="https://www.euronews.com/rss"),
+    Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
+]
+
+# Initialize ChromaDB Client
+db = chromadb.Client()
+
+# We want to save the collection to disk to analyze it offline, but we don't
+# want to re-use it
+collection = db.get_or_create_collection("docs")
+
+# store each document in a vector embedding database
+for i, feed in enumerate(rss_feeds):
+    # Use the client instance instead of the global ollama module
+    for j, article in enumerate(feed.articles):
+        log.info(f"Article {feed.name} {j}. {len(article)}")
+        response = client.embeddings(model="mxbai-embed-large", prompt=article)
+        embeddings = response["embedding"]  # Note: it's "embedding", not "embeddings"
+        collection.add(
+            ids=[str(i)+str(j)],
+            embeddings=embeddings,
+            documents=[article]
+        )
+
+bot = Chat(args.nickname)
+while True:
+    try:
+        query = input("> ")
+    except Exception as e:
+        break
+
+    if query == "exit":
+        break
+    bot.on_message("chat", "user", f"airc: {query}")
\ No newline at end of file
diff --git a/src/chunk.py b/src/chunk.py
new file mode 100644
index 0000000..9366b66
--- /dev/null
+++ b/src/chunk.py
@@ -0,0 +1,468 @@
+import requests
+from typing import List, Dict, Any, Union
+import tiktoken
+import feedparser
+import logging as log
+import datetime
+from bs4 import BeautifulSoup
+import chromadb
+import ollama
+import re
+import numpy as np
+
+def normalize(vec):
+    return vec / np.linalg.norm(vec)
+
+OLLAMA_API_URL = "http://ollama:11434"  # Default Ollama local endpoint
+MODEL_NAME = "deepseek-r1:7b"
+EMBED_MODEL = "mxbai-embed-large"
+PERSIST_DIRECTORY = "/root/.cache/chroma"
+
+client = ollama.Client(host=OLLAMA_API_URL)
+
+def extract_text_from_html_or_xml(content, is_xml=False):
+    # Parse the content
+    if is_xml:
+        soup = BeautifulSoup(content, 'xml')  # Use 'xml' parser for XML content
+    else:
+        soup = BeautifulSoup(content, 'html.parser')  # Default to 'html.parser' for HTML content
+
+    # Extract and return just the text
+    return soup.get_text()
+
+class Feed():
+    def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
+        self.name = name
+        self.url = url
+        self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
+        self.last_poll = None
+        self.articles = []
+        self.max_articles = max_articles
+        self.update()
+
+    def update(self):
+        now = datetime.datetime.now()
+        if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
+            log.info(f"Updating {self.name}")
+            feed = feedparser.parse(self.url)
+            self.articles = []
+            self.last_poll = now
+
+            if len(feed.entries) == 0:
+                return
+            
+            for i, entry in enumerate(feed.entries[:self.max_articles]):
+                content = {}
+                content['source'] = self.name
+                content['id'] = f"{self.name}{i}"
+                title = entry.get("title")
+                if title:
+                    content['title'] = title
+                link = entry.get("link")
+                if link:
+                    content['link'] = link
+                text = entry.get("summary")
+                if text:
+                    content['text'] = extract_text_from_html_or_xml(text, False)
+                else:
+                    continue
+                published = entry.get("published")
+                if published:
+                    content['published'] = published
+
+                self.articles.append(content)
+        else:
+            log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
+        return self.articles
+
+# News RSS Feeds
+rss_feeds = [
+    Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
+    Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
+    Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
+    Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
+    Feed(name="Time", url="https://time.com/feed/"),
+    Feed(name="Euronews", url="https://www.euronews.com/rss"),
+#    Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
+]
+
+def get_encoding():
+    """Get the tokenizer for counting tokens."""
+    try:
+        return tiktoken.get_encoding("cl100k_base")  # Default encoding used by many embedding models
+    except:
+        return tiktoken.encoding_for_model(MODEL_NAME)
+
+def count_tokens(text: str) -> int:
+    """Count the number of tokens in a text string."""
+    encoding = get_encoding()
+    return len(encoding.encode(text))
+
+def chunk_text(text: str, max_tokens: int = 512, overlap: int = 50) -> List[str]:
+    """
+    Split a text into chunks based on token count with overlap between chunks.
+    
+    Args:
+        text: The text to split into chunks
+        max_tokens: Maximum number of tokens per chunk
+        overlap: Number of tokens to overlap between chunks
+    
+    Returns:
+        List of text chunks
+    """
+    if not text or max_tokens <= 0:
+        return []
+    
+    encoding = get_encoding()
+    tokens = encoding.encode(text)
+    chunks = []
+    
+    i = 0
+    while i < len(tokens):
+        # Get the current chunk of tokens
+        chunk_end = min(i + max_tokens, len(tokens))
+        chunk_tokens = tokens[i:chunk_end]
+        chunks.append(encoding.decode(chunk_tokens))
+        
+        # Move to the next position with overlap
+        if chunk_end == len(tokens):
+            break
+        i += max_tokens - overlap
+    
+    return chunks
+
+def chunk_document(document: Dict[str, Any], 
+                  text_key: str = "text",
+                  max_tokens: int = 512, 
+                  overlap: int = 50) -> List[Dict[str, Any]]:
+    """
+    Chunk a document dictionary into multiple chunks.
+    
+    Args:
+        document: Document dictionary with metadata and text
+        text_key: The key in the document that contains the text to chunk
+        max_tokens: Maximum number of tokens per chunk
+        overlap: Number of tokens to overlap between chunks
+        
+    Returns:
+        List of document dictionaries, each with chunked text and preserved metadata
+    """
+    if text_key not in document:
+        raise Exception(f"{text_key} not in document")
+    
+    # Extract text and create chunks
+    if "title" in document:
+        text = f"{document["title"]}: {document[text_key]}"
+    else:
+        text = document[text_key]
+    chunks = chunk_text(text, max_tokens, overlap)
+    
+    # Create document chunks with preserved metadata
+    chunked_docs = []
+    for i, chunk in enumerate(chunks):
+        # Create a new doc with all original fields
+        doc_chunk = document.copy()
+        # Replace text with the chunk
+        doc_chunk[text_key] = chunk
+        # Add chunk metadata
+        doc_chunk["chunk_id"] = i
+        doc_chunk["chunk_total"] = len(chunks)
+        chunked_docs.append(doc_chunk)
+    
+    return chunked_docs
+
+def init_chroma_client(persist_directory: str = PERSIST_DIRECTORY):
+    """Initialize and return a ChromaDB client."""
+    return chromadb.PersistentClient(path=persist_directory)
+
+def create_or_get_collection(client, collection_name: str):
+    """Create or get a ChromaDB collection."""
+    try:
+        return client.get_collection(
+            name=collection_name
+        )
+    except:
+        return client.create_collection(
+            name=collection_name,
+            metadata={"hnsw:space": "cosine"}
+        )
+
+def process_documents_to_chroma(
+    documents: List[Dict[str, Any]],
+    collection_name: str = "document_collection",
+    text_key: str = "text",
+    max_tokens: int = 512,
+    overlap: int = 50,
+    model: str = EMBED_MODEL,
+    persist_directory: str = PERSIST_DIRECTORY
+):
+    """
+    Process documents, chunk them, compute embeddings, and store in ChromaDB.
+    
+    Args:
+        documents: List of document dictionaries
+        collection_name: Name for the ChromaDB collection
+        text_key: The key containing text content
+        max_tokens: Maximum tokens per chunk
+        overlap: Token overlap between chunks
+        model: Ollama model for embeddings
+        persist_directory: Directory to store ChromaDB data
+    """
+    # Initialize ChromaDB client and collection
+    db = init_chroma_client(persist_directory)
+    collection = create_or_get_collection(db, collection_name)
+    
+    # Process each document
+    for doc in documents:
+        # Chunk the document
+        doc_chunks = chunk_document(doc, text_key, max_tokens, overlap)
+        
+        # Prepare data for ChromaDB
+        ids = []
+        texts = []
+        metadatas = []
+        embeddings = []
+
+        for chunk in doc_chunks:
+            # Create a unique ID for the chunk
+            chunk_id = f"{chunk['id']}_{chunk['chunk_id']}"
+            
+            # Extract text
+            text = chunk[text_key]
+            
+            # Create metadata (excluding text and embedding to avoid duplication)
+            metadata = {k: v for k, v in chunk.items() if k != text_key and k != "embedding"}
+
+            response = client.embed(model=model, input=text)
+            embedding = response["embeddings"][0]
+            ids.append(chunk_id)
+            texts.append(text)
+            metadatas.append(metadata)
+            embeddings.append(embedding)
+        
+        # Add chunks to ChromaDB collection
+        collection.add(
+            ids=ids,
+            documents=texts,
+            embeddings=embeddings,
+            metadatas=metadatas
+        )
+    
+    return collection
+
+def query_chroma(
+    query_text: str,
+    collection_name: str = "document_collection",
+    n_results: int = 5,
+    model: str = EMBED_MODEL,
+    persist_directory: str = PERSIST_DIRECTORY
+):
+    """
+    Query ChromaDB for similar documents.
+    
+    Args:
+        query_text: The text to search for
+        collection_name: Name of the ChromaDB collection
+        n_results: Number of results to return
+        model: Ollama model for embedding the query
+        persist_directory: Directory where ChromaDB data is stored
+    
+    Returns:
+        Query results from ChromaDB
+    """
+    # Initialize ChromaDB client and collection
+    db = init_chroma_client(persist_directory)
+    collection = create_or_get_collection(db, collection_name)
+    
+    query_response = client.embed(model=model, input=query_text)
+    query_embeddings = query_response["embeddings"]
+
+    # Query the collection
+    results = collection.query(
+        query_embeddings=query_embeddings,
+        n_results=n_results
+    )
+    
+    return results
+
+def print_top_match(query_results, index=0, documents=None):
+    """
+    Print detailed information about the top matching document,
+    including the full original document content.
+    
+    Args:
+        query_results: Results from ChromaDB query
+        documents: Original documents dictionary to look up full content (optional)
+    """
+    if not query_results or not query_results["ids"] or len(query_results["ids"][0]) == 0:
+        print("No matching documents found.")
+        return
+    
+    # Get the top result
+    top_id = query_results["ids"][0][index]
+    top_document_chunk = query_results["documents"][0][index]
+    top_metadata = query_results["metadatas"][0][index]
+    top_distance = query_results["distances"][0][index]
+    
+    print("="*50)
+    print("MATCHING DOCUMENT")
+    print("="*50)
+    print(f"Chunk ID: {top_id}")
+    print(f"Similarity Score: {top_distance:.4f}")  # Convert distance to similarity
+    
+    print("\nCHUNK METADATA:")
+    for key, value in top_metadata.items():
+        print(f"  {key}: {value}")
+    
+    print("\nMATCHING CHUNK CONTENT:")
+    print(top_document_chunk[:500].strip() + ("..." if len(top_document_chunk) > 500 else ""))
+    
+    # Extract the original document ID from the chunk ID
+    # Chunk IDs are in format "doc_id_chunk_num"
+    original_doc_id = top_id.split('_')[0]
+
+def get_top_match(query_results, index=0, documents=None):
+    top_id = query_results["ids"][index][0]
+    # Extract the original document ID from the chunk ID
+    # Chunk IDs are in format "doc_id_chunk_num"
+    original_doc_id = top_id.split('_')[0]
+        
+    # Return the full document for further processing if needed
+    if documents is not None:
+        return next((doc for doc in documents if doc["id"] == original_doc_id), None)
+    
+    return None
+
+def show_documents(documents=None):
+    if not documents:
+        return
+    
+    # Print the top matching document
+    for i, doc in enumerate(documents):
+        print(f"Document {i+1}:")
+        print(f"  Title: {doc['title']}")
+        print(f"  Text: {doc['text'][:100]}...")
+        print()
+
+def show_headlines(documents=None):
+    if not documents:
+        return
+    
+    # Print the top matching document
+    for doc in documents:
+        print(f"{doc['source']}: {doc['title']}")
+
+# Example usage
+if __name__ == "__main__":
+    documents = []
+    for feed in rss_feeds:
+        documents.extend(feed.articles)
+
+    show_documents(documents=documents)
+
+    # Process documents and store in ChromaDB
+    collection = process_documents_to_chroma(
+        documents=documents,
+        collection_name="research_papers",
+        max_tokens=256,
+        overlap=25,
+        model=EMBED_MODEL,
+        persist_directory="/root/.cache/chroma"
+    )
+
+    last_results = None
+    while True:
+        try:
+            search_query = input("> ").strip()
+        except Exception as e:
+            break
+    
+        if search_query == "docs":
+            show_documents(documents)
+            continue
+
+        if search_query == "":
+            show_headlines(documents)
+            continue
+
+        if search_query == "why":
+            if last_results:
+                print_top_match(last_results, documents=documents)
+            else:
+                print("No match to give info on")
+            continue
+
+        if search_query == "scores":
+            if last_results:
+                for i, _ in enumerate(last_results):
+                    print_top_match(last_results, documents=documents, index=i)
+            else:
+                print("No match to give info on")
+            continue
+
+
+        if search_query == "full":
+            if last_results:
+                full = get_top_match(last_results, documents=documents)
+                if full:
+                    print(f"""Context:
+Source: {full["source"]}
+Title: {full["title"]}
+Link: {full["link"]}
+Distance: {last_results.get("distances", [[0]])[0][0]}
+Full text:
+{full["text"]}""")
+            else:
+                print("No match to give info on")
+            continue
+
+        # Query ChromaDB
+        results = query_chroma(
+            query_text=search_query,
+            collection_name="research_papers",
+            n_results=10
+        )
+        last_results = results
+
+        full = get_top_match(results, documents=documents)
+
+        headlines = ""
+        for doc in documents:
+            headlines += f"{doc['source']}: {doc['title']}\n"
+
+        system=f"""
+News headlines:
+
+{headlines}
+
+"""
+        if full:
+            system += f"""
+
+Make reference to the following and provide the 'Link':
+
+Source: {full["source"]}
+Link: {full["link"]}
+Text: {full["text"]}
+
+Do not ask to help the user further.
+
+"""
+            print(f"""Context:
+
+Source: {full["source"]}
+Title: {full["title"]}
+Distance: {last_results.get("distances", [[0]])[0][0]}
+Link: {full["link"]}""")
+            
+        continue
+
+        output = client.generate(
+            model=MODEL_NAME,
+            system=system,
+            prompt=f"Respond to this prompt: {search_query}",
+            stream=False
+        )
+        # Prune off the <think>...</think>
+        content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
+        print(f"Response> {content}")