diff --git a/Dockerfile b/Dockerfile index d47c63d..974a2d8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,75 @@ +# +# Build Pyton 3.11 for use in later stages +# +FROM ubuntu:oracular AS python-build + +SHELL [ "/bin/bash", "-c" ] + +# Instructions Dockerfied from: +# +# https://github.com/pytorch/pytorch +# +# and +# +# https://pytorch.org/docs/stable/notes/get_start_xpu.html +# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html +# +# +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + gpg \ + wget \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} + + +# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12) + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + gpg-agent \ + less \ + libbz2-dev \ + libffi-dev \ + libjpeg-dev \ + libpng-dev \ + libreadline-dev \ + libssl-dev \ + libsqlite3-dev \ + llvm \ + nano \ + wget \ + zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} + +# python3 \ +# python3-pip \ +# python3-venv \ +# python3-dev \ + +RUN /usr/sbin/update-ccache-symlinks +RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache + +# Build Python in /opt/..., install it locally, then remove the build environment +# collapsed to a single docker layer. +WORKDIR /opt +ENV PYTHON_VERSION=3.11.9 + +RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \ + && cd Python-${PYTHON_VERSION} \ + && ./configure --prefix=/opt/python --enable-optimizations \ + && make -j$(nproc) \ + && make install \ + && cd /opt \ + && rm -rf Python-${PYTHON_VERSION} + FROM ubuntu:oracular AS ze-monitor # From https://github.com/jketreno/ze-monitor RUN apt-get update \ @@ -29,19 +101,75 @@ RUN cmake .. \ && make \ && cpack +# +# Build the ipex-llm wheel for use in later stages +# +FROM python-build AS ipex-llm-src + +RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2 + +RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \ + && cd /opt/ipex-llm \ + && git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \ + && git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765 + +WORKDIR /opt/ipex-llm + +RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv +RUN { \ + echo '#!/bin/bash' ; \ + echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ + echo 'source /opt/ipex-llm/venv/bin/activate' ; \ + echo 'bash -c "${@}"' ; \ + } > /opt/ipex-llm/shell ; \ + chmod +x /opt/ipex-llm/shell + +SHELL [ "/opt/ipex-llm/shell" ] + +RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu + +WORKDIR /opt/ipex-llm/python/llm +RUN pip install requests wheel +RUN python setup.py clean --all bdist_wheel --linux + +# +# The main airc image: +# * python 3.11 +# * pytorch xpu w/ ipex-llm +# * ollama-ipex-llm +# * src/server.py - model server supporting RAG and fine-tuned models +# +# Agents using server: +# * src/web-ui.py - REACT server (airc.ketrenos.com) +# * src/irc.py - IRC backend (irc.libera.chat #airc-test) +# * src/cli.py - Command line chat +# +# Utilities: +# * src/training-fine-tune.py - Perform fine-tuning on currated documents FROM ubuntu:oracular AS airc +COPY --from=python-build /opt/python /opt/python + # Get a couple prerequisites RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ gpg \ - python3 \ - python3-pip \ - python3-venv \ + # python3 \ + # python3-pip \ + # python3-venv \ wget \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} +# The client frontend is built using React Expo to allow +# easy creation of an Android app as well as web app +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} + # Install Intel graphics runtimes RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \ @@ -58,27 +186,41 @@ RUN apt-get update \ WORKDIR /opt/airc +RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2 + # Setup the ollama python virtual environment RUN python3 -m venv --system-site-packages /opt/airc/venv # Setup the docker pip shell RUN { \ echo '#!/bin/bash' ; \ + echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ + echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ echo 'source /opt/airc/venv/bin/activate' ; \ - echo 'bash -c "${@}"' ; \ + echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \ } > /opt/airc/shell ; \ chmod +x /opt/airc/shell # Activate the pip environment on all shell calls SHELL [ "/opt/airc/shell" ] + +# From https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.6.10%2Bxpu&os=linux%2Fwsl2&package=pip +RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu +RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ + +# From https://huggingface.co/docs/bitsandbytes/main/en/installation?backend=Intel+CPU+%2B+GPU#multi-backend +RUN pip install "transformers>=4.45.1" +RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl' + # Install ollama python module -RUN pip3 install ollama +RUN pip install ollama + # pydle does not work with newer asyncio due to coroutine # being deprecated. Patch to work. COPY /src/pydle.patch /opt/pydle.patch -RUN pip3 install pydle \ +RUN pip install pydle \ && patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \ -p1 < /opt/pydle.patch \ && rm /opt/pydle.patch @@ -87,9 +229,49 @@ RUN pip install setuptools --upgrade RUN pip install ollama RUN pip install feedparser bs4 chromadb RUN pip install tiktoken +RUN pip install flask flask_cors +RUN pip install peft datasets + +COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/ +RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done + +# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest) +# as well as MistralSpda* and QwenSpda* things missing (needed when loading models with ) +RUN pip install "sentence_transformers<3.4.1" +# "transformers==4.40.0" "" +#RUN pip install sentence_transformers "transformers==4.40.0" "trl<0.12.0" +#RUN pip install transformers==4.45.0 "trl<0.12.0" +# trl.core doesn't have what is needed with the default 'pip install trl' version +#RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c + +# To get xe_linear and other Xe methods +# NOTE: As of 2025-03-10, these are only available for Python 3.11, hence +# why we build python from source +RUN pip3 install 'bigdl-core-xe-all>=2.6.0b' + +# NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env +RUN pip install einops diffusers # Required for IPEX optimize(), which is required to convert from Params4bit + +RUN pip install yfinance pyzt geopy SHELL [ "/bin/bash", "-c" ] +# Don't install the full oneapi essentials; just the ones that we seem to need +# RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ +# | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \ +# && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \ +# | tee /etc/apt/sources.list.d/oneAPI.list \ +# && apt-get update \ +# && DEBIAN_FRONTEND=noninteractive apt-get install -y \ +# intel-oneapi-mkl-sycl-2025.0 \ +# intel-oneapi-dnnl-2025.0 \ +# intel-oneapi-dpcpp-cpp-2025.0 \ +# && apt-get clean \ +# && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} +# dpcpp is needed for LoRA backend when +# libze-dev is needed for LoRA/triton backend in order to build stuff +# Unfortunately, that fails with: +# ImportError: /opt/airc/venv/lib/python3.11/site-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ libncurses6 \ @@ -108,6 +290,8 @@ RUN { \ echo 'echo "Container: airc"'; \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/airc"'; \ + echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ + echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ echo 'source /opt/airc/venv/bin/activate'; \ echo ''; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/airc/)?shell$ ]]; then'; \ @@ -126,6 +310,11 @@ RUN { \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh +# From +ENV USE_XETLA=OFF +ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +ENV SYCL_CACHE_PERSISTENT=1 + ENTRYPOINT [ "/entrypoint.sh" ] FROM ubuntu:oracular AS ollama @@ -185,7 +374,7 @@ RUN { \ SHELL [ "/opt/ollama/shell" ] # Install ollama python module -RUN pip3 install ollama +RUN pip install ollama SHELL [ "/bin/bash", "-c" ] @@ -233,13 +422,14 @@ FROM airc AS jupyter SHELL [ "/opt/airc/shell" ] # BEGIN setup Jupyter -RUN pip install jupyter \ - jupyterlab==4.3.0a0 \ - jupyterhub==5.0.0 \ - notebook==7.3.0a0 \ - "jupyter-server-proxy>=4.1.2" +RUN pip install \ + jupyterlab \ + dash[jupyterlab] \ + && jupyter lab build --dev-build=False --minimize=False # END setup Jupyter +RUN pip install -r /opt/airc/src/requirements.txt + SHELL [ "/bin/bash", "-c" ] RUN { \ @@ -259,8 +449,8 @@ RUN { \ echo 'source /opt/airc/venv/bin/activate' ; \ echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \ echo 'while true; do' ; \ - echo ' echo "Launching jupyter notebook"' ; \ - echo ' jupyter notebook \' ; \ + echo ' echo "Launching jupyter lab"' ; \ + echo ' jupyter lab \' ; \ echo ' --notebook-dir=/opt/jupyter \' ; \ echo ' --port 8888 \' ; \ echo ' --ip 0.0.0.0 \' ; \ @@ -278,4 +468,67 @@ RUN { \ } > /entrypoint-jupyter.sh \ && chmod +x /entrypoint-jupyter.sh -ENTRYPOINT [ "/entrypoint-jupyter.sh" ] \ No newline at end of file +ENTRYPOINT [ "/entrypoint-jupyter.sh" ] + +FROM ubuntu:oracular AS miniircd + +COPY --from=python-build /opt/python /opt/python + +# Get a couple prerequisites +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + gpg \ + wget \ + nano \ + irssi \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} + +WORKDIR /opt/miniircd + +RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2 + +# Setup the ollama python virtual environment +RUN python3 -m venv --system-site-packages /opt/miniircd/venv + +# Setup the docker pip shell +RUN { \ + echo '#!/bin/bash' ; \ + echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ + echo 'source /opt/miniircd/venv/bin/activate' ; \ + echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \ + } > /opt/miniircd/shell ; \ + chmod +x /opt/miniircd/shell + +# Activate the pip environment on all shell calls +SHELL [ "/opt/miniircd/shell" ] + +RUN pip install miniircd + +SHELL [ "/bin/bash", "-c" ] + +RUN { \ + echo '#!/bin/bash'; \ + echo 'echo "Container: miniircd"'; \ + echo 'set -e'; \ + echo 'echo "Setting pip environment to /opt/miniircd"'; \ + echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ + echo 'source /opt/miniircd/venv/bin/activate'; \ + echo ''; \ + echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/miniircd/)?shell$ ]]; then'; \ + echo ' echo "Dropping to shell"'; \ + echo ' shift' ; \ + echo ' echo "Running: ${@}"' ; \ + echo ' if [[ "${1}" != "" ]]; then' ; \ + echo ' exec ${@}'; \ + echo ' else' ; \ + echo ' exec /bin/bash'; \ + echo ' fi' ; \ + echo 'else'; \ + echo ' echo "Launching IRC server..."'; \ + echo ' miniircd --setuid root "${@}"' ; \ + echo 'fi'; \ + } > /entrypoint.sh \ + && chmod +x /entrypoint.sh + +ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/docker-compose.yml b/docker-compose.yml index 4ab5ddd..67b44cd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,9 +14,13 @@ services: - ollama networks: - internal + ports: + - 8911:8911 volumes: - ./cache:/root/.cache - ./src:/opt/airc/src:rw + - ./doc:/opt/airc/doc:ro + - ./results:/opt/airc/results:rw cap_add: # used for running ze-monitor within airc container - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) @@ -36,8 +40,8 @@ services: - ONEAPI_DEVICE_SELECTOR=level_zero:0 devices: - /dev/dri:/dev/dri -# ports: -# - 11434:11434 # ollama serve port + ports: + - 11434:11434 # ollama serve port networks: - internal volumes: @@ -61,13 +65,41 @@ services: - /dev/dri:/dev/dri depends_on: - ollama + - miniircd ports: - 8888:8888 # Jupyter Notebook + - 60673:60673 # Gradio networks: - internal volumes: - ./jupyter:/opt/jupyter:rw - ./cache:/root/.cache + deploy: + resources: + limits: + memory: "0" # No memory limit (Docker treats 0 as unlimited) + reservations: + memory: "0" # No reserved memory (optional) + ulimits: + memlock: -1 # Prevents memory from being locked + oom_kill_disable: true # Prevents OOM killer from killing the container + + miniircd: + build: + context: . + dockerfile: Dockerfile + target: miniircd + image: miniircd + env_file: + - .env + devices: + - /dev/dri:/dev/dri + ports: + - 6667:6667 # IRC + networks: + - internal + volumes: + - ./cache:/root/.cache networks: internal: diff --git a/jupyter/stock.py b/jupyter/stock.py index 1a22c8b..6d119e3 100644 --- a/jupyter/stock.py +++ b/jupyter/stock.py @@ -539,11 +539,11 @@ def create_ui(): outputs=[chat_history, tool_history] ) - # timer.tick(check_message_queue, inputs=chatbot, outputs=chatbot).then( - # update_log, # This new function updates the log after chatbot processing - # inputs=chatbot, - # outputs=[chat_history, tool_history] - # ) + timer.tick(check_message_queue, inputs=chatbot, outputs=chatbot).then( + update_log, # This new function updates the log after chatbot processing + inputs=chatbot, + outputs=[chat_history, tool_history] + ) clear.click(do_clear, inputs=None, outputs=[chatbot, chat_history, tool_history], queue=False) diff --git a/src/chunk.py b/src/chunk.py deleted file mode 100644 index b853403..0000000 --- a/src/chunk.py +++ /dev/null @@ -1,562 +0,0 @@ -import requests -from typing import List, Dict, Any, Union -import tiktoken -import feedparser -import logging as log -import datetime -from bs4 import BeautifulSoup -import chromadb -import ollama -import re -import numpy as np - -def normalize(vec): - return vec / np.linalg.norm(vec) - -OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint -MODEL_NAME = "deepseek-r1:7b" -EMBED_MODEL = "mxbai-embed-large" -PERSIST_DIRECTORY = "/root/.cache/chroma" - -client = ollama.Client(host=OLLAMA_API_URL) - -def extract_text_from_html_or_xml(content, is_xml=False): - # Parse the content - if is_xml: - soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content - else: - soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content - - # Extract and return just the text - return soup.get_text() - -class Feed(): - def __init__(self, name, url, poll_limit_min = 30, max_articles=5): - self.name = name - self.url = url - self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min) - self.last_poll = None - self.articles = [] - self.max_articles = max_articles - self.update() - - def update(self): - now = datetime.datetime.now() - if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min: - log.info(f"Updating {self.name}") - feed = feedparser.parse(self.url) - self.articles = [] - self.last_poll = now - - if len(feed.entries) == 0: - return - - for i, entry in enumerate(feed.entries[:self.max_articles]): - content = {} - content['source'] = self.name - content['id'] = f"{self.name}{i}" - title = entry.get("title") - if title: - content['title'] = title - link = entry.get("link") - if link: - content['link'] = link - text = entry.get("summary") - if text: - content['text'] = extract_text_from_html_or_xml(text, False) - else: - continue - published = entry.get("published") - if published: - content['published'] = published - - self.articles.append(content) - else: - log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.") - return self.articles - -# News RSS Feeds -rss_feeds = [ - Feed(name="IGN.com", url="https://feeds.feedburner.com/ign/games-all"), - Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"), - Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"), - Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"), - Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"), - Feed(name="Time", url="https://time.com/feed/"), - Feed(name="Euronews", url="https://www.euronews.com/rss"), -# Feed(name="FeedX", url="https://feedx.net/rss/ap.xml") -] - -def get_encoding(): - """Get the tokenizer for counting tokens.""" - try: - return tiktoken.get_encoding("cl100k_base") # Default encoding used by many embedding models - except: - return tiktoken.encoding_for_model(MODEL_NAME) - -def count_tokens(text: str) -> int: - """Count the number of tokens in a text string.""" - encoding = get_encoding() - return len(encoding.encode(text)) - -def chunk_text(text: str, max_tokens: int = 512, overlap: int = 50) -> List[str]: - """ - Split a text into chunks based on token count with overlap between chunks. - - Args: - text: The text to split into chunks - max_tokens: Maximum number of tokens per chunk - overlap: Number of tokens to overlap between chunks - - Returns: - List of text chunks - """ - if not text or max_tokens <= 0: - return [] - - encoding = get_encoding() - tokens = encoding.encode(text) - chunks = [] - - i = 0 - while i < len(tokens): - # Get the current chunk of tokens - chunk_end = min(i + max_tokens, len(tokens)) - chunk_tokens = tokens[i:chunk_end] - chunks.append(encoding.decode(chunk_tokens)) - - # Move to the next position with overlap - if chunk_end == len(tokens): - break - i += max_tokens - overlap - - return chunks - -def chunk_document(document: Dict[str, Any], - text_key: str = "text", - max_tokens: int = 512, - overlap: int = 50) -> List[Dict[str, Any]]: - """ - Chunk a document dictionary into multiple chunks. - - Args: - document: Document dictionary with metadata and text - text_key: The key in the document that contains the text to chunk - max_tokens: Maximum number of tokens per chunk - overlap: Number of tokens to overlap between chunks - - Returns: - List of document dictionaries, each with chunked text and preserved metadata - """ - if text_key not in document: - raise Exception(f"{text_key} not in document") - - # Extract text and create chunks - if "title" in document: - text = f"{document["title"]}: {document[text_key]}" - else: - text = document[text_key] - chunks = chunk_text(text, max_tokens, overlap) - - # Create document chunks with preserved metadata - chunked_docs = [] - for i, chunk in enumerate(chunks): - # Create a new doc with all original fields - doc_chunk = document.copy() - # Replace text with the chunk - doc_chunk[text_key] = chunk - # Add chunk metadata - doc_chunk["chunk_id"] = i - doc_chunk["chunk_total"] = len(chunks) - chunked_docs.append(doc_chunk) - - return chunked_docs - -def init_chroma_client(persist_directory: str = PERSIST_DIRECTORY): - """Initialize and return a ChromaDB client.""" -# return chromadb.PersistentClient(path=persist_directory) - return chromadb.Client() - -def create_or_get_collection(client, collection_name: str): - """Create or get a ChromaDB collection.""" - try: - return client.get_collection( - name=collection_name - ) - except: - return client.create_collection( - name=collection_name, - metadata={"hnsw:space": "cosine"} - ) - -def process_documents_to_chroma( - documents: List[Dict[str, Any]], - collection_name: str = "document_collection", - text_key: str = "text", - max_tokens: int = 512, - overlap: int = 50, - model: str = EMBED_MODEL, - persist_directory: str = PERSIST_DIRECTORY -): - """ - Process documents, chunk them, compute embeddings, and store in ChromaDB. - - Args: - documents: List of document dictionaries - collection_name: Name for the ChromaDB collection - text_key: The key containing text content - max_tokens: Maximum tokens per chunk - overlap: Token overlap between chunks - model: Ollama model for embeddings - persist_directory: Directory to store ChromaDB data - """ - # Initialize ChromaDB client and collection - db = init_chroma_client(persist_directory) - collection = create_or_get_collection(db, collection_name) - - # Process each document - for doc in documents: - # Chunk the document - doc_chunks = chunk_document(doc, text_key, max_tokens, overlap) - - # Prepare data for ChromaDB - ids = [] - texts = [] - metadatas = [] - embeddings = [] - - for chunk in doc_chunks: - # Create a unique ID for the chunk - chunk_id = f"{chunk['id']}_{chunk['chunk_id']}" - - # Extract text - text = chunk[text_key] - - # Create metadata (excluding text and embedding to avoid duplication) - metadata = {k: v for k, v in chunk.items() if k != text_key and k != "embedding"} - - response = client.embed(model=model, input=text) - embedding = response["embeddings"][0] - ids.append(chunk_id) - texts.append(text) - metadatas.append(metadata) - embeddings.append(embedding) - - # Add chunks to ChromaDB collection - collection.add( - ids=ids, - documents=texts, - embeddings=embeddings, - metadatas=metadatas - ) - - return collection - -def query_chroma( - query_text: str, - collection_name: str = "document_collection", - n_results: int = 5, - model: str = EMBED_MODEL, - persist_directory: str = PERSIST_DIRECTORY -): - """ - Query ChromaDB for similar documents. - - Args: - query_text: The text to search for - collection_name: Name of the ChromaDB collection - n_results: Number of results to return - model: Ollama model for embedding the query - persist_directory: Directory where ChromaDB data is stored - - Returns: - Query results from ChromaDB - """ - # Initialize ChromaDB client and collection - db = init_chroma_client(persist_directory) - collection = create_or_get_collection(db, collection_name) - - query_response = client.embed(model=model, input=query_text) - query_embeddings = query_response["embeddings"] - - # Query the collection - results = collection.query( - query_embeddings=query_embeddings, - n_results=n_results - ) - - return results - -def print_top_match(query_results, index=0, documents=None): - """ - Print detailed information about the top matching document, - including the full original document content. - - Args: - query_results: Results from ChromaDB query - documents: Original documents dictionary to look up full content (optional) - """ - if not query_results or not query_results["ids"] or len(query_results["ids"][0]) == 0: - print("No matching documents found.") - return - - # Get the top result - top_id = query_results["ids"][0][index] - top_document_chunk = query_results["documents"][0][index] - top_metadata = query_results["metadatas"][0][index] - top_distance = query_results["distances"][0][index] - - print("="*50) - print("MATCHING DOCUMENT") - print("="*50) - print(f"Chunk ID: {top_id}") - print(f"Similarity Score: {top_distance:.4f}") # Convert distance to similarity - - print("\nCHUNK METADATA:") - for key, value in top_metadata.items(): - print(f" {key}: {value}") - - print("\nMATCHING CHUNK CONTENT:") - print(top_document_chunk[:500].strip() + ("..." if len(top_document_chunk) > 500 else "")) - - # Extract the original document ID from the chunk ID - # Chunk IDs are in format "doc_id_chunk_num" - original_doc_id = top_id.split('_')[0] - -def get_top_match(query_results, index=0, documents=None): - top_id = query_results["ids"][index][0] - # Extract the original document ID from the chunk ID - # Chunk IDs are in format "doc_id_chunk_num" - original_doc_id = top_id.split('_')[0] - - # Return the full document for further processing if needed - if documents is not None: - return next((doc for doc in documents if doc["id"] == original_doc_id), None) - - return None - -def show_documents(documents=None): - if not documents: - return - - # Print the top matching document - for i, doc in enumerate(documents): - print(f"Document {i+1}:") - print(f" Title: {doc['title']}") - print(f" Text: {doc['text'][:100]}...") - print() - -def show_headlines(documents=None): - if not documents: - return - - # Print the top matching document - for doc in documents: - print(f"{doc['source']}: {doc['title']}") - -def show_help(): - print("""help> -docs Show RAG docs -full Show last full top match -headlines Show the RAG headlines -prompt Show the last prompt -response Show the last response -scores Show last RAG scores -why|think Show last response's -context|match Show RAG match info to last prompt -""") - - -# Example usage -if __name__ == "__main__": - documents = [] - for feed in rss_feeds: - documents.extend(feed.articles) - - show_documents(documents=documents) - - # Process documents and store in ChromaDB - collection = process_documents_to_chroma( - documents=documents, - collection_name="research_papers", - max_tokens=256, - overlap=25, - model=EMBED_MODEL, - persist_directory="/root/.cache/chroma" - ) - - last_results = None - last_prompt = None - last_system = None - last_response = None - last_why = None - last_messages = [] - while True: - try: - search_query = input("> ").strip() - except KeyboardInterrupt as e: - print("\nExiting.") - break - - if search_query == "exit" or search_query == "quit": - print("\nExiting.") - break - - if search_query == "docs": - show_documents(documents) - continue - - if search_query == "prompt": - if last_prompt: - print(f"""last prompt> -{"="*10}system{"="*10} -{last_system} -{"="*10}prompt{"="*10} -{last_prompt}""") - else: - print(f"No prompts yet") - continue - - if search_query == "response": - if last_response: - print(f"""last response> -{"="*10}response{"="*10} -{last_response}""") - else: - print(f"No responses yet") - continue - - if search_query == "" or search_query == "help": - show_help() - continue - - if search_query == "headlines": - show_headlines(documents) - continue - - if search_query == "match" or search_query == "context": - if last_results: - print_top_match(last_results, documents=documents) - else: - print("No match to give info on") - continue - - if search_query == "why" or search_query == "think": - if last_why: - print(f""" -why> -{last_why} -""") - else: - print("No processed prompts") - continue - - if search_query == "scores": - if last_results: - for i, _ in enumerate(last_results): - print_top_match(last_results, documents=documents, index=i) - else: - print("No match to give info on") - continue - - if search_query == "full": - if last_results: - full = get_top_match(last_results, documents=documents) - if full: - print(f"""Context: -Source: {full["source"]} -Title: {full["title"]} -Link: {full["link"]} -Distance: {last_results.get("distances", [[0]])[0][0]} -Full text: -{full["text"]}""") - else: - print("No match to give info on") - continue - - # Query ChromaDB - results = query_chroma( - query_text=search_query, - collection_name="research_papers", - n_results=10 - ) - last_results = results - - full = get_top_match(results, documents=documents) - - headlines = "" - for doc in documents: - headlines += f"{doc['source']}: {doc['title']}\n" - - system=""" -You are the assistant. Your name is airc. - -Do not ask to help the user further. - -Provide short (less than 100 character) responses. - -Rules: -* If the user asks for information about the AI model, how, or who wrote it, provide information about the author from inside the tags. -* If you think the user might be asking about the author, ask a follow up question to clarify. -* If there is news in between the tags relevant to the prompt, use that. Always mention the source when information comes from an item. If asked for the link, provide it. -* Respond to the prompt in a single, direct response. -* Do not prefix it with a word like "Answer" - -You must follow the rules. - -""" - -# * If a user asks for weather information, include in your response "{{weather_query("country", "city", "state")}}" where the description of the weather should go. - - context = f""" -author={[ -{'info': 'James wrote the python application that is driving this RAG model on top of deepseek-r1:7b. You can find it at https://github.com/jketreno/airc'}, -{'info': 'James Ketrenos wrote the program deploying this AI model with RAG.'}, -{'info': 'James Ketrenos is a software engineer with a history in all levels of the computer stack, from the kernel to full-stack web applications. He dabbles in AI/ML and is familiar with pytorch and ollama.'}, -{'info': 'James lives in Portland, Oregon and has three kids. Two are attending Oregon State University and one is attending Williamette University.'} -]} -""" - - - context += "additional information unrelated to James Ketrenos = [" - for doc in documents: - item = {'source':doc["source"],'article':{'title':doc["title"],'link':doc["link"],'text':doc["text"]}} - context += f"{item}" - context += """] - -""" - prompt = f"{context}{search_query}" - last_prompt = prompt - last_system = system - if len(last_messages) != 0: - message_context = f"{last_messages}" - prompt = f"{message_context}{prompt}" - - print(f"system len: {len(system)}") - print(f"prompt len: {len(prompt)}") - output = client.generate( - model=MODEL_NAME, - system=f"{system}{context}", - prompt=prompt, - stream=False, - options={ 'num_ctx': 100000 } - ) - # Prune off the ... - matches = re.match(r'^(.*?)(.*)$', output['response'], flags=re.DOTALL) - if matches: - last_why = matches[1].strip() - content = matches[2].strip() - else: - print(f"[garbled] response>\n{output['response']}") - print(f"Response>\n{content}") - - last_response = content - last_messages.extend(({ - 'role': 'user', - 'name': 'james', - 'message': search_query - }, { - 'role': 'assistant', - 'message': content - })) - last_messages = last_messages[:10]