This commit is contained in:
James Ketr 2025-03-18 13:08:48 -07:00
parent 5f6971510a
commit 1130077c03
4 changed files with 307 additions and 584 deletions

View File

@ -1,3 +1,75 @@
#
# Build Pyton 3.11 for use in later stages
#
FROM ubuntu:oracular AS python-build
SHELL [ "/bin/bash", "-c" ]
# Instructions Dockerfied from:
#
# https://github.com/pytorch/pytorch
#
# and
#
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
#
#
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
ca-certificates \
ccache \
cmake \
curl \
git \
gpg-agent \
less \
libbz2-dev \
libffi-dev \
libjpeg-dev \
libpng-dev \
libreadline-dev \
libssl-dev \
libsqlite3-dev \
llvm \
nano \
wget \
zlib1g-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# python3 \
# python3-pip \
# python3-venv \
# python3-dev \
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
# Build Python in /opt/..., install it locally, then remove the build environment
# collapsed to a single docker layer.
WORKDIR /opt
ENV PYTHON_VERSION=3.11.9
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
&& cd Python-${PYTHON_VERSION} \
&& ./configure --prefix=/opt/python --enable-optimizations \
&& make -j$(nproc) \
&& make install \
&& cd /opt \
&& rm -rf Python-${PYTHON_VERSION}
FROM ubuntu:oracular AS ze-monitor
# From https://github.com/jketreno/ze-monitor
RUN apt-get update \
@ -29,19 +101,75 @@ RUN cmake .. \
&& make \
&& cpack
#
# Build the ipex-llm wheel for use in later stages
#
FROM python-build AS ipex-llm-src
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
&& cd /opt/ipex-llm \
&& git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
&& git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
WORKDIR /opt/ipex-llm
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/ipex-llm/shell ; \
chmod +x /opt/ipex-llm/shell
SHELL [ "/opt/ipex-llm/shell" ]
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
WORKDIR /opt/ipex-llm/python/llm
RUN pip install requests wheel
RUN python setup.py clean --all bdist_wheel --linux
#
# The main airc image:
# * python 3.11
# * pytorch xpu w/ ipex-llm
# * ollama-ipex-llm
# * src/server.py - model server supporting RAG and fine-tuned models
#
# Agents using server:
# * src/web-ui.py - REACT server (airc.ketrenos.com)
# * src/irc.py - IRC backend (irc.libera.chat #airc-test)
# * src/cli.py - Command line chat
#
# Utilities:
# * src/training-fine-tune.py - Perform fine-tuning on currated documents
FROM ubuntu:oracular AS airc
COPY --from=python-build /opt/python /opt/python
# Get a couple prerequisites
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
python3 \
python3-pip \
python3-venv \
# python3 \
# python3-pip \
# python3-venv \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# The client frontend is built using React Expo to allow
# easy creation of an Android app as well as web app
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
nodejs \
npm \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# Install Intel graphics runtimes
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
@ -58,27 +186,41 @@ RUN apt-get update \
WORKDIR /opt/airc
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/airc/venv
# Setup the docker pip shell
RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
} > /opt/airc/shell ; \
chmod +x /opt/airc/shell
# Activate the pip environment on all shell calls
SHELL [ "/opt/airc/shell" ]
# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
# From https://huggingface.co/docs/bitsandbytes/main/en/installation?backend=Intel+CPU+%2B+GPU#multi-backend
RUN pip install "transformers>=4.45.1"
RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
# Install ollama python module
RUN pip3 install ollama
RUN pip install ollama
# pydle does not work with newer asyncio due to coroutine
# being deprecated. Patch to work.
COPY /src/pydle.patch /opt/pydle.patch
RUN pip3 install pydle \
RUN pip install pydle \
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
-p1 < /opt/pydle.patch \
&& rm /opt/pydle.patch
@ -87,9 +229,49 @@ RUN pip install setuptools --upgrade
RUN pip install ollama
RUN pip install feedparser bs4 chromadb
RUN pip install tiktoken
RUN pip install flask flask_cors
RUN pip install peft datasets
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
# as well as MistralSpda* and QwenSpda* things missing (needed when loading models with )
RUN pip install "sentence_transformers<3.4.1"
# "transformers==4.40.0" ""
#RUN pip install sentence_transformers "transformers==4.40.0" "trl<0.12.0"
#RUN pip install transformers==4.45.0 "trl<0.12.0"
# trl.core doesn't have what is needed with the default 'pip install trl' version
#RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
# To get xe_linear and other Xe methods
# NOTE: As of 2025-03-10, these are only available for Python 3.11, hence
# why we build python from source
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
# NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env
RUN pip install einops diffusers # Required for IPEX optimize(), which is required to convert from Params4bit
RUN pip install yfinance pyzt geopy
SHELL [ "/bin/bash", "-c" ]
# Don't install the full oneapi essentials; just the ones that we seem to need
# RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
# | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
# && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
# | tee /etc/apt/sources.list.d/oneAPI.list \
# && apt-get update \
# && DEBIAN_FRONTEND=noninteractive apt-get install -y \
# intel-oneapi-mkl-sycl-2025.0 \
# intel-oneapi-dnnl-2025.0 \
# intel-oneapi-dpcpp-cpp-2025.0 \
# && apt-get clean \
# && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# dpcpp is needed for LoRA backend when
# libze-dev is needed for LoRA/triton backend in order to build stuff
# Unfortunately, that fails with:
# ImportError: /opt/airc/venv/lib/python3.11/site-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libncurses6 \
@ -108,6 +290,8 @@ RUN { \
echo 'echo "Container: airc"'; \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/airc"'; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate'; \
echo ''; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \
@ -126,6 +310,11 @@ RUN { \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
# From
ENV USE_XETLA=OFF
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
ENV SYCL_CACHE_PERSISTENT=1
ENTRYPOINT [ "/entrypoint.sh" ]
FROM ubuntu:oracular AS ollama
@ -185,7 +374,7 @@ RUN { \
SHELL [ "/opt/ollama/shell" ]
# Install ollama python module
RUN pip3 install ollama
RUN pip install ollama
SHELL [ "/bin/bash", "-c" ]
@ -233,13 +422,14 @@ FROM airc AS jupyter
SHELL [ "/opt/airc/shell" ]
# BEGIN setup Jupyter
RUN pip install jupyter \
jupyterlab==4.3.0a0 \
jupyterhub==5.0.0 \
notebook==7.3.0a0 \
"jupyter-server-proxy>=4.1.2"
RUN pip install \
jupyterlab \
dash[jupyterlab] \
&& jupyter lab build --dev-build=False --minimize=False
# END setup Jupyter
RUN pip install -r /opt/airc/src/requirements.txt
SHELL [ "/bin/bash", "-c" ]
RUN { \
@ -259,8 +449,8 @@ RUN { \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
echo 'while true; do' ; \
echo ' echo "Launching jupyter notebook"' ; \
echo ' jupyter notebook \' ; \
echo ' echo "Launching jupyter lab"' ; \
echo ' jupyter lab \' ; \
echo ' --notebook-dir=/opt/jupyter \' ; \
echo ' --port 8888 \' ; \
echo ' --ip 0.0.0.0 \' ; \
@ -278,4 +468,67 @@ RUN { \
} > /entrypoint-jupyter.sh \
&& chmod +x /entrypoint-jupyter.sh
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
FROM ubuntu:oracular AS miniircd
COPY --from=python-build /opt/python /opt/python
# Get a couple prerequisites
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
wget \
nano \
irssi \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
WORKDIR /opt/miniircd
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/miniircd/venv
# Setup the docker pip shell
RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/miniircd/venv/bin/activate' ; \
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
} > /opt/miniircd/shell ; \
chmod +x /opt/miniircd/shell
# Activate the pip environment on all shell calls
SHELL [ "/opt/miniircd/shell" ]
RUN pip install miniircd
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash'; \
echo 'echo "Container: miniircd"'; \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/miniircd"'; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/miniircd/venv/bin/activate'; \
echo ''; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/miniircd/)?shell$ ]]; then'; \
echo ' echo "Dropping to shell"'; \
echo ' shift' ; \
echo ' echo "Running: ${@}"' ; \
echo ' if [[ "${1}" != "" ]]; then' ; \
echo ' exec ${@}'; \
echo ' else' ; \
echo ' exec /bin/bash'; \
echo ' fi' ; \
echo 'else'; \
echo ' echo "Launching IRC server..."'; \
echo ' miniircd --setuid root "${@}"' ; \
echo 'fi'; \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
ENTRYPOINT [ "/entrypoint.sh" ]

View File

@ -14,9 +14,13 @@ services:
- ollama
networks:
- internal
ports:
- 8911:8911
volumes:
- ./cache:/root/.cache
- ./src:/opt/airc/src:rw
- ./doc:/opt/airc/doc:ro
- ./results:/opt/airc/results:rw
cap_add: # used for running ze-monitor within airc container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
@ -36,8 +40,8 @@ services:
- ONEAPI_DEVICE_SELECTOR=level_zero:0
devices:
- /dev/dri:/dev/dri
# ports:
# - 11434:11434 # ollama serve port
ports:
- 11434:11434 # ollama serve port
networks:
- internal
volumes:
@ -61,13 +65,41 @@ services:
- /dev/dri:/dev/dri
depends_on:
- ollama
- miniircd
ports:
- 8888:8888 # Jupyter Notebook
- 60673:60673 # Gradio
networks:
- internal
volumes:
- ./jupyter:/opt/jupyter:rw
- ./cache:/root/.cache
deploy:
resources:
limits:
memory: "0" # No memory limit (Docker treats 0 as unlimited)
reservations:
memory: "0" # No reserved memory (optional)
ulimits:
memlock: -1 # Prevents memory from being locked
oom_kill_disable: true # Prevents OOM killer from killing the container
miniircd:
build:
context: .
dockerfile: Dockerfile
target: miniircd
image: miniircd
env_file:
- .env
devices:
- /dev/dri:/dev/dri
ports:
- 6667:6667 # IRC
networks:
- internal
volumes:
- ./cache:/root/.cache
networks:
internal:

View File

@ -539,11 +539,11 @@ def create_ui():
outputs=[chat_history, tool_history]
)
# timer.tick(check_message_queue, inputs=chatbot, outputs=chatbot).then(
# update_log, # This new function updates the log after chatbot processing
# inputs=chatbot,
# outputs=[chat_history, tool_history]
# )
timer.tick(check_message_queue, inputs=chatbot, outputs=chatbot).then(
update_log, # This new function updates the log after chatbot processing
inputs=chatbot,
outputs=[chat_history, tool_history]
)
clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history], queue=False)

View File

@ -1,562 +0,0 @@
import requests
from typing import List, Dict, Any, Union
import tiktoken
import feedparser
import logging as log
import datetime
from bs4 import BeautifulSoup
import chromadb
import ollama
import re
import numpy as np
def normalize(vec):
return vec / np.linalg.norm(vec)
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
MODEL_NAME = "deepseek-r1:7b"
EMBED_MODEL = "mxbai-embed-large"
PERSIST_DIRECTORY = "/root/.cache/chroma"
client = ollama.Client(host=OLLAMA_API_URL)
def extract_text_from_html_or_xml(content, is_xml=False):
# Parse the content
if is_xml:
soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content
else:
soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content
# Extract and return just the text
return soup.get_text()
class Feed():
def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
self.name = name
self.url = url
self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
self.last_poll = None
self.articles = []
self.max_articles = max_articles
self.update()
def update(self):
now = datetime.datetime.now()
if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
log.info(f"Updating {self.name}")
feed = feedparser.parse(self.url)
self.articles = []
self.last_poll = now
if len(feed.entries) == 0:
return
for i, entry in enumerate(feed.entries[:self.max_articles]):
content = {}
content['source'] = self.name
content['id'] = f"{self.name}{i}"
title = entry.get("title")
if title:
content['title'] = title
link = entry.get("link")
if link:
content['link'] = link
text = entry.get("summary")
if text:
content['text'] = extract_text_from_html_or_xml(text, False)
else:
continue
published = entry.get("published")
if published:
content['published'] = published
self.articles.append(content)
else:
log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
return self.articles
# News RSS Feeds
rss_feeds = [
Feed(name="IGN.com", url="https://feeds.feedburner.com/ign/games-all"),
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
Feed(name="Time", url="https://time.com/feed/"),
Feed(name="Euronews", url="https://www.euronews.com/rss"),
# Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
]
def get_encoding():
"""Get the tokenizer for counting tokens."""
try:
return tiktoken.get_encoding("cl100k_base") # Default encoding used by many embedding models
except:
return tiktoken.encoding_for_model(MODEL_NAME)
def count_tokens(text: str) -> int:
"""Count the number of tokens in a text string."""
encoding = get_encoding()
return len(encoding.encode(text))
def chunk_text(text: str, max_tokens: int = 512, overlap: int = 50) -> List[str]:
"""
Split a text into chunks based on token count with overlap between chunks.
Args:
text: The text to split into chunks
max_tokens: Maximum number of tokens per chunk
overlap: Number of tokens to overlap between chunks
Returns:
List of text chunks
"""
if not text or max_tokens <= 0:
return []
encoding = get_encoding()
tokens = encoding.encode(text)
chunks = []
i = 0
while i < len(tokens):
# Get the current chunk of tokens
chunk_end = min(i + max_tokens, len(tokens))
chunk_tokens = tokens[i:chunk_end]
chunks.append(encoding.decode(chunk_tokens))
# Move to the next position with overlap
if chunk_end == len(tokens):
break
i += max_tokens - overlap
return chunks
def chunk_document(document: Dict[str, Any],
text_key: str = "text",
max_tokens: int = 512,
overlap: int = 50) -> List[Dict[str, Any]]:
"""
Chunk a document dictionary into multiple chunks.
Args:
document: Document dictionary with metadata and text
text_key: The key in the document that contains the text to chunk
max_tokens: Maximum number of tokens per chunk
overlap: Number of tokens to overlap between chunks
Returns:
List of document dictionaries, each with chunked text and preserved metadata
"""
if text_key not in document:
raise Exception(f"{text_key} not in document")
# Extract text and create chunks
if "title" in document:
text = f"{document["title"]}: {document[text_key]}"
else:
text = document[text_key]
chunks = chunk_text(text, max_tokens, overlap)
# Create document chunks with preserved metadata
chunked_docs = []
for i, chunk in enumerate(chunks):
# Create a new doc with all original fields
doc_chunk = document.copy()
# Replace text with the chunk
doc_chunk[text_key] = chunk
# Add chunk metadata
doc_chunk["chunk_id"] = i
doc_chunk["chunk_total"] = len(chunks)
chunked_docs.append(doc_chunk)
return chunked_docs
def init_chroma_client(persist_directory: str = PERSIST_DIRECTORY):
"""Initialize and return a ChromaDB client."""
# return chromadb.PersistentClient(path=persist_directory)
return chromadb.Client()
def create_or_get_collection(client, collection_name: str):
"""Create or get a ChromaDB collection."""
try:
return client.get_collection(
name=collection_name
)
except:
return client.create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
def process_documents_to_chroma(
documents: List[Dict[str, Any]],
collection_name: str = "document_collection",
text_key: str = "text",
max_tokens: int = 512,
overlap: int = 50,
model: str = EMBED_MODEL,
persist_directory: str = PERSIST_DIRECTORY
):
"""
Process documents, chunk them, compute embeddings, and store in ChromaDB.
Args:
documents: List of document dictionaries
collection_name: Name for the ChromaDB collection
text_key: The key containing text content
max_tokens: Maximum tokens per chunk
overlap: Token overlap between chunks
model: Ollama model for embeddings
persist_directory: Directory to store ChromaDB data
"""
# Initialize ChromaDB client and collection
db = init_chroma_client(persist_directory)
collection = create_or_get_collection(db, collection_name)
# Process each document
for doc in documents:
# Chunk the document
doc_chunks = chunk_document(doc, text_key, max_tokens, overlap)
# Prepare data for ChromaDB
ids = []
texts = []
metadatas = []
embeddings = []
for chunk in doc_chunks:
# Create a unique ID for the chunk
chunk_id = f"{chunk['id']}_{chunk['chunk_id']}"
# Extract text
text = chunk[text_key]
# Create metadata (excluding text and embedding to avoid duplication)
metadata = {k: v for k, v in chunk.items() if k != text_key and k != "embedding"}
response = client.embed(model=model, input=text)
embedding = response["embeddings"][0]
ids.append(chunk_id)
texts.append(text)
metadatas.append(metadata)
embeddings.append(embedding)
# Add chunks to ChromaDB collection
collection.add(
ids=ids,
documents=texts,
embeddings=embeddings,
metadatas=metadatas
)
return collection
def query_chroma(
query_text: str,
collection_name: str = "document_collection",
n_results: int = 5,
model: str = EMBED_MODEL,
persist_directory: str = PERSIST_DIRECTORY
):
"""
Query ChromaDB for similar documents.
Args:
query_text: The text to search for
collection_name: Name of the ChromaDB collection
n_results: Number of results to return
model: Ollama model for embedding the query
persist_directory: Directory where ChromaDB data is stored
Returns:
Query results from ChromaDB
"""
# Initialize ChromaDB client and collection
db = init_chroma_client(persist_directory)
collection = create_or_get_collection(db, collection_name)
query_response = client.embed(model=model, input=query_text)
query_embeddings = query_response["embeddings"]
# Query the collection
results = collection.query(
query_embeddings=query_embeddings,
n_results=n_results
)
return results
def print_top_match(query_results, index=0, documents=None):
"""
Print detailed information about the top matching document,
including the full original document content.
Args:
query_results: Results from ChromaDB query
documents: Original documents dictionary to look up full content (optional)
"""
if not query_results or not query_results["ids"] or len(query_results["ids"][0]) == 0:
print("No matching documents found.")
return
# Get the top result
top_id = query_results["ids"][0][index]
top_document_chunk = query_results["documents"][0][index]
top_metadata = query_results["metadatas"][0][index]
top_distance = query_results["distances"][0][index]
print("="*50)
print("MATCHING DOCUMENT")
print("="*50)
print(f"Chunk ID: {top_id}")
print(f"Similarity Score: {top_distance:.4f}") # Convert distance to similarity
print("\nCHUNK METADATA:")
for key, value in top_metadata.items():
print(f" {key}: {value}")
print("\nMATCHING CHUNK CONTENT:")
print(top_document_chunk[:500].strip() + ("..." if len(top_document_chunk) > 500 else ""))
# Extract the original document ID from the chunk ID
# Chunk IDs are in format "doc_id_chunk_num"
original_doc_id = top_id.split('_')[0]
def get_top_match(query_results, index=0, documents=None):
top_id = query_results["ids"][index][0]
# Extract the original document ID from the chunk ID
# Chunk IDs are in format "doc_id_chunk_num"
original_doc_id = top_id.split('_')[0]
# Return the full document for further processing if needed
if documents is not None:
return next((doc for doc in documents if doc["id"] == original_doc_id), None)
return None
def show_documents(documents=None):
if not documents:
return
# Print the top matching document
for i, doc in enumerate(documents):
print(f"Document {i+1}:")
print(f" Title: {doc['title']}")
print(f" Text: {doc['text'][:100]}...")
print()
def show_headlines(documents=None):
if not documents:
return
# Print the top matching document
for doc in documents:
print(f"{doc['source']}: {doc['title']}")
def show_help():
print("""help>
docs Show RAG docs
full Show last full top match
headlines Show the RAG headlines
prompt Show the last prompt
response Show the last response
scores Show last RAG scores
why|think Show last response's <think>
context|match Show RAG match info to last prompt
""")
# Example usage
if __name__ == "__main__":
documents = []
for feed in rss_feeds:
documents.extend(feed.articles)
show_documents(documents=documents)
# Process documents and store in ChromaDB
collection = process_documents_to_chroma(
documents=documents,
collection_name="research_papers",
max_tokens=256,
overlap=25,
model=EMBED_MODEL,
persist_directory="/root/.cache/chroma"
)
last_results = None
last_prompt = None
last_system = None
last_response = None
last_why = None
last_messages = []
while True:
try:
search_query = input("> ").strip()
except KeyboardInterrupt as e:
print("\nExiting.")
break
if search_query == "exit" or search_query == "quit":
print("\nExiting.")
break
if search_query == "docs":
show_documents(documents)
continue
if search_query == "prompt":
if last_prompt:
print(f"""last prompt>
{"="*10}system{"="*10}
{last_system}
{"="*10}prompt{"="*10}
{last_prompt}""")
else:
print(f"No prompts yet")
continue
if search_query == "response":
if last_response:
print(f"""last response>
{"="*10}response{"="*10}
{last_response}""")
else:
print(f"No responses yet")
continue
if search_query == "" or search_query == "help":
show_help()
continue
if search_query == "headlines":
show_headlines(documents)
continue
if search_query == "match" or search_query == "context":
if last_results:
print_top_match(last_results, documents=documents)
else:
print("No match to give info on")
continue
if search_query == "why" or search_query == "think":
if last_why:
print(f"""
why>
{last_why}
""")
else:
print("No processed prompts")
continue
if search_query == "scores":
if last_results:
for i, _ in enumerate(last_results):
print_top_match(last_results, documents=documents, index=i)
else:
print("No match to give info on")
continue
if search_query == "full":
if last_results:
full = get_top_match(last_results, documents=documents)
if full:
print(f"""Context:
Source: {full["source"]}
Title: {full["title"]}
Link: {full["link"]}
Distance: {last_results.get("distances", [[0]])[0][0]}
Full text:
{full["text"]}""")
else:
print("No match to give info on")
continue
# Query ChromaDB
results = query_chroma(
query_text=search_query,
collection_name="research_papers",
n_results=10
)
last_results = results
full = get_top_match(results, documents=documents)
headlines = ""
for doc in documents:
headlines += f"{doc['source']}: {doc['title']}\n"
system="""
You are the assistant. Your name is airc.
Do not ask to help the user further.
Provide short (less than 100 character) responses.
Rules:
* If the user asks for information about the AI model, how, or who wrote it, provide information about the author from inside the <author></author> tags.
* If you think the user might be asking about the author, ask a follow up question to clarify.
* If there is news in between the <input></input> tags relevant to the prompt, use that. Always mention the source when information comes from an item. If asked for the link, provide it.
* Respond to the prompt in a single, direct response.
* Do not prefix it with a word like "Answer"
You must follow the rules.
"""
# * If a user asks for weather information, include in your response "{{weather_query("country", "city", "state")}}" where the description of the weather should go.
context = f"""<author>
author={[
{'info': 'James wrote the python application that is driving this RAG model on top of deepseek-r1:7b. You can find it at https://github.com/jketreno/airc'},
{'info': 'James Ketrenos wrote the program deploying this AI model with RAG.'},
{'info': 'James Ketrenos is a software engineer with a history in all levels of the computer stack, from the kernel to full-stack web applications. He dabbles in AI/ML and is familiar with pytorch and ollama.'},
{'info': 'James lives in Portland, Oregon and has three kids. Two are attending Oregon State University and one is attending Williamette University.'}
]}
</author>"""
context += "<input>additional information unrelated to James Ketrenos = ["
for doc in documents:
item = {'source':doc["source"],'article':{'title':doc["title"],'link':doc["link"],'text':doc["text"]}}
context += f"{item}"
context += """]
</input>
"""
prompt = f"{context}{search_query}"
last_prompt = prompt
last_system = system
if len(last_messages) != 0:
message_context = f"{last_messages}"
prompt = f"{message_context}{prompt}"
print(f"system len: {len(system)}")
print(f"prompt len: {len(prompt)}")
output = client.generate(
model=MODEL_NAME,
system=f"{system}{context}",
prompt=prompt,
stream=False,
options={ 'num_ctx': 100000 }
)
# Prune off the <think>...</think>
matches = re.match(r'^<think>(.*?)</think>(.*)$', output['response'], flags=re.DOTALL)
if matches:
last_why = matches[1].strip()
content = matches[2].strip()
else:
print(f"[garbled] response>\n{output['response']}")
print(f"Response>\n{content}")
last_response = content
last_messages.extend(({
'role': 'user',
'name': 'james',
'message': search_query
}, {
'role': 'assistant',
'message': content
}))
last_messages = last_messages[:10]