diff --git a/Dockerfile b/Dockerfile index 8421d6f..87beb80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -120,7 +120,7 @@ RUN { \ echo '#!/bin/bash' ; \ echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ echo 'source /opt/ipex-llm/venv/bin/activate' ; \ - echo 'bash -c "${@}"' ; \ + echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \ } > /opt/ipex-llm/shell ; \ chmod +x /opt/ipex-llm/shell @@ -214,7 +214,7 @@ RUN pip install "transformers>=4.45.1" RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl' # Install ollama python module -RUN pip install ollama +RUN pip install ollama langchain-ollama # pydle does not work with newer asyncio due to coroutine # being deprecated. Patch to work. @@ -226,7 +226,7 @@ RUN pip install pydle \ && rm /opt/pydle.patch RUN pip install setuptools --upgrade -RUN pip install ollama +RUN pip install ollama langchain-ollama RUN pip install feedparser bs4 chromadb RUN pip install tiktoken RUN pip install flask flask_cors flask_sock @@ -281,6 +281,7 @@ RUN apt-get update \ COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/ RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb +RUN usermod -aG ze-monitor root COPY /src/ /opt/airc/src/ @@ -345,7 +346,9 @@ RUN apt-get update \ WORKDIR /opt/ollama # Download the nightly ollama release from ipex-llm -RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \ +#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz +ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz +RUN wget -qO - ${OLLAMA_VERSION} | \ tar --strip-components=1 -C . -xzv # Install Python from Oracular (ollama works with 3.12) @@ -367,7 +370,7 @@ RUN { \ echo '#!/bin/bash' ; \ update-alternatives --set python3 /opt/python/bin/python3.11 ; \ echo 'source /opt/ollama/venv/bin/activate' ; \ - echo 'bash -c "${@}"' ; \ + echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \ } > /opt/ollama/shell ; \ chmod +x /opt/ollama/shell @@ -375,7 +378,7 @@ RUN { \ SHELL [ "/opt/ollama/shell" ] # Install ollama python module -RUN pip install ollama +RUN pip install ollama langchain-ollama SHELL [ "/bin/bash", "-c" ] @@ -393,10 +396,11 @@ RUN { \ echo ''; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ - echo ' exec /bin/bash'; \ + echo ' shift'; \ + echo ' if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \ echo 'else'; \ echo ' echo "Launching Ollama server..."'; \ - echo ' exec ./ollama serve'; \ + echo ' exec ollama serve'; \ echo 'fi'; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh @@ -407,8 +411,11 @@ RUN { \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/ollama"'; \ echo 'source /opt/ollama/venv/bin/activate'; \ - echo './ollama pull mxbai-embed-large' ; \ - echo './ollama pull deepseek-r1:7b' ; \ + echo 'ollama pull qwen2.5:7b' ; \ + echo 'ollama pull llama3.2' ; \ + echo 'ollama pull mxbai-embed-large' ; \ + echo 'ollama pull deepseek-r1:7b' ; \ + echo 'ollama pull mistral:7b' ; \ } > /fetch-models.sh \ && chmod +x /fetch-models.sh @@ -416,6 +423,8 @@ ENV PYTHONUNBUFFERED=1 VOLUME [" /root/.ollama" ] +ENV PATH=/opt/ollama:${PATH} + ENTRYPOINT [ "/entrypoint.sh" ] FROM airc AS jupyter @@ -455,7 +464,6 @@ RUN { \ echo ' --notebook-dir=/opt/jupyter \' ; \ echo ' --port 8888 \' ; \ echo ' --ip 0.0.0.0 \' ; \ - echo ' --no-browser \' ; \ echo ' --allow-root \' ; \ echo ' --ServerApp.token= \' ; \ echo ' --ServerApp.password= \' ; \ @@ -469,6 +477,8 @@ RUN { \ } > /entrypoint-jupyter.sh \ && chmod +x /entrypoint-jupyter.sh +# echo ' --no-browser \' ; \ + ENTRYPOINT [ "/entrypoint-jupyter.sh" ] FROM ubuntu:oracular AS miniircd diff --git a/README.md b/README.md index 0ce6c56..7a9c7a5 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ AI is Really Cool -This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds. +This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds. -Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM. +Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.) NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them. @@ -31,16 +31,56 @@ cd airc docker compose build ``` +## Containers + +This project provides the following containers: + +| Container | Purpose | +|:----------|:---------------------------------------------------------------| +| airc | Base container with GPU packages installed and configured | +| jupyter | airc + Jupyter notebook for running Jupyter sessions | +| miniircd | Tiny deployment of an IRC server for testing IRC agents | +| ollama | Installation of Intel's pre-built Ollama.cpp | + +While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code) + +To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search). + +Prior to using a new model, you need to download it: + +```bash +MODEL=qwen2.5:7b +docker compose exec -it ollama ollama pull ${MODEL} +``` + +To download many common models for testing against, you can use the `fetch-models.sh` script which will download: + +* qwen2.5:7b +* llama3.2 +* mxbai-embed-large +* deepseek-r1:7b +* mistral:7b + +```bash +docker compose exec -it ollama /fetch-models.sh +``` + +The persisted volume mount can grow quite large with models, GPU kernel caching, etc. During the development of this project, the `./cache` directory has grown to consume ~250G of disk space. + ## Running -In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token. +In order to download Hugging Face models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token. Edit .env to add the following: ```.env HF_ACCESS_TOKEN= +HF_HOME=/root/.cache ``` +HF_HOME is set for running in the containers to point to a volume mounted +directory which will enable model downloads to be persisted. + NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container. ### AIRC diff --git a/docker-compose.yml b/docker-compose.yml index dddbf7b..c289ed9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,7 @@ services: - ./src:/opt/airc/src:rw - ./doc:/opt/airc/doc:ro - ./results:/opt/airc/results:rw - cap_add: # used for running ze-monitor within airc container + cap_add: # used for running ze-monitor within container - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check @@ -47,7 +47,7 @@ services: volumes: - ./cache:/root/.cache # Cache hub models and neo_compiler_cache - ./ollama:/root/.ollama # Cache the ollama models - cap_add: # used for running ze-monitor within airc container + cap_add: # used for running ze-monitor within container - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check @@ -84,7 +84,11 @@ services: memory: "0" # No reserved memory (optional) ulimits: memlock: -1 # Prevents memory from being locked - oom_kill_disable: true # Prevents OOM killer from killing the container + #oom_kill_disable: true # Prevents OOM killer from killing the container + cap_add: # used for running ze-monitor within container + - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks + - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) + - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check miniircd: build: diff --git a/src/ketr-chat/src/App.css b/src/ketr-chat/src/App.css index a0c3057..0259b62 100644 --- a/src/ketr-chat/src/App.css +++ b/src/ketr-chat/src/App.css @@ -10,6 +10,32 @@ div { flex-direction: column; } +.SystemInfo { + display: flex; + flex-direction: column; + gap: 5px; + padding: 5px; + flex-grow: 1; +} + +.SystemInfoItem { + display: flex; /* Grid for individual items */ + flex-direction: row; + flex-grow: 1; +} + +.SystemInfoItem > div:first-child { + display: flex; + justify-self: end; /* Align the first column content to the right */ + width: 10rem; +} + +.SystemInfoItem > div:last-child { + display: flex; + flex-grow: 1; + justify-self: end; /* Align the first column content to the right */ +} + .ChatBox { display: flex; flex-direction: column; diff --git a/src/ketr-chat/src/App.tsx b/src/ketr-chat/src/App.tsx index 5e908f5..8f1aa23 100644 --- a/src/ketr-chat/src/App.tsx +++ b/src/ketr-chat/src/App.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect, useRef, useCallback } from 'react'; +import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react'; import FormGroup from '@mui/material/FormGroup'; import FormControlLabel from '@mui/material/FormControlLabel'; import { useTheme } from '@mui/material'; @@ -70,6 +70,7 @@ interface ControlsParams { tools: Tool[], rags: Tool[], systemPrompt: string, + systemInfo: SystemInfo, toggleTool: (tool: Tool) => void, toggleRag: (tool: Tool) => void, setRags: (rags: Tool[]) => void, @@ -77,7 +78,50 @@ interface ControlsParams { reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise }; -const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset }: ControlsParams) => { +type SystemInfo = { + "Installed RAM (GB)": string, + "Graphics Cards": string[], + "CPU": string +}; + +const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => { + const [systemElements, setSystemElements] = useState([]); + + const convertToSymbols = (text: string) => { + return text + .replace(/\(R\)/g, '®') // Replace (R) with the ® symbol + .replace(/\(C\)/g, '©') // Replace (C) with the © symbol + .replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol + }; + + useEffect(() => { + const elements = Object.entries(systemInfo).flatMap(([k, v]) => { + // If v is an array, repeat for each card + if (Array.isArray(v)) { + return v.map((card, index) => ( +
+
{convertToSymbols(k)} {index}
+
{convertToSymbols(card)}
+
+ )); + } + + // If it's not an array, handle normally + return ( +
+
{convertToSymbols(k)}
+
{convertToSymbols(String(v))}
+
+ ); + }); + + setSystemElements(elements); + }, [systemInfo]); + + return
{systemElements}
; +}; + +const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset, systemInfo }: ControlsParams) => { const [editSystemPrompt, setEditSystemPrompt] = useState(systemPrompt); useEffect(() => { @@ -110,9 +154,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP - } - > + }> System Prompt @@ -135,9 +177,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP - } - > + }> Tools @@ -157,9 +197,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP - } - > + }> RAG @@ -178,6 +216,17 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP } + + }> + System Information + + + The server is running on the following hardware: + + + + + ); @@ -199,6 +248,7 @@ const App = () => { const [rags, setRags] = useState([]); const [systemPrompt, setSystemPrompt] = useState(""); const [serverSystemPrompt, setServerSystemPrompt] = useState(""); + const [systemInfo, setSystemInfo] = useState(undefined); // Scroll to bottom of conversation when conversation updates useEffect(() => { @@ -214,6 +264,27 @@ const App = () => { setSnackOpen(true); }, []); + // Get the system information + useEffect(() => { + if (systemInfo !== undefined || sessionId === undefined) { + return; + } + fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }) + .then(response => response.json()) + .then(data => { + setSystemInfo(data); + }) + .catch(error => { + console.error('Error obtaining system information:', error); + setSnack("Unable to obtain system information.", "error"); + }); + }, [systemInfo, setSystemInfo, loc, setSnack, sessionId]) + // Set the initial chat history to "loading" or the welcome message if loaded. useEffect(() => { if (sessionId === undefined) { @@ -468,7 +539,7 @@ const App = () => { const drawer = ( <> - {sessionId !== undefined && } + {sessionId !== undefined && systemInfo !== undefined && } ); @@ -735,7 +806,8 @@ const App = () => { )} {message.role === 'assistant' ? (
- + + {/* */}
) : (
{formattedContent}
diff --git a/src/server.py b/src/server.py index c285336..d1cd6f1 100644 --- a/src/server.py +++ b/src/server.py @@ -3,7 +3,6 @@ # Standard library modules (no try-except needed) import argparse import asyncio -import anyio import json import logging import os @@ -15,6 +14,8 @@ import textwrap import threading import uuid import random +import subprocess +import re def try_import(module_name, pip_name=None): try: @@ -26,7 +27,6 @@ def try_import(module_name, pip_name=None): # Third-party modules with import checks try_import('gradio') try_import('ollama') -try_import('openai') try_import('pytz') try_import('requests') try_import('yfinance', 'yfinance') @@ -35,13 +35,13 @@ try_import('geopy', 'geopy') try_import('hyphen', 'PyHyphen') try_import('bs4', 'beautifulsoup4') try_import('nltk') +try_import('fastapi') import nltk from dotenv import load_dotenv from geopy.geocoders import Nominatim import gradio as gr import ollama -import openai import pytz import requests import yfinance as yf @@ -50,6 +50,7 @@ from bs4 import BeautifulSoup from fastapi import FastAPI, HTTPException, BackgroundTasks, Request from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse from fastapi.middleware.cors import CORSMiddleware +from utils import rag from tools import ( get_weather_by_location, @@ -63,11 +64,59 @@ rags = [ { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." }, ] + +def get_installed_ram(): + try: + with open('/proc/meminfo', 'r') as f: + meminfo = f.read() + match = re.search(r'MemTotal:\s+(\d+)', meminfo) + if match: + return f"{round(int(match.group(1)) / 1024**2, 2)}GB" # Convert KB to GB + except Exception as e: + return f"Error retrieving RAM: {e}" + +def get_graphics_cards(): + gpus = [] + try: + # Run the ze-monitor utility + result = subprocess.run(['ze-monitor'], capture_output=True, text=True, check=True) + + # Clean up the output (remove leading/trailing whitespace and newlines) + output = result.stdout.strip() + for line in output.splitlines(): + # Updated regex to handle GPU names containing parentheses + match = re.match(r'^[^(]*\((.*)\)', line) + if match: + gpus.append(match.group(1)) + + return gpus + except Exception as e: + return f"Error retrieving GPU info: {e}" + +def get_cpu_info(): + try: + with open('/proc/cpuinfo', 'r') as f: + cpuinfo = f.read() + model_match = re.search(r'model name\s+:\s+(.+)', cpuinfo) + cores_match = re.findall(r'processor\s+:\s+\d+', cpuinfo) + if model_match and cores_match: + return f"{model_match.group(1)} with {len(cores_match)} cores" + except Exception as e: + return f"Error retrieving CPU info: {e}" + +def system_info(): + return { + "Installed RAM": get_installed_ram(), + "Graphics Card": get_graphics_cards(), + "CPU": get_cpu_info() + } + # %% # Defaults OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint #MODEL_NAME = "deepseek-r1:7b" -MODEL_NAME = "llama3.2" +#MODEL_NAME = "llama3.2" +MODEL_NAME = "qwen2.5:7b" LOG_LEVEL="debug" USE_TLS=False WEB_HOST="0.0.0.0" @@ -419,7 +468,11 @@ class WebServer: context = self.upsert_context(context_id) system_prompt = context["system"][0]["content"]; return JSONResponse({ "system-prompt": system_prompt }) - + + @self.app.get('/api/system-info/{context_id}') + async def get_system_info(context_id: str): + return JSONResponse(system_info()) + @self.app.post('/api/chat/{context_id}') async def chat_endpoint(context_id: str, request: Request): context = self.upsert_context(context_id) @@ -662,5 +715,4 @@ def main(): logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}") web_server.run(host=args.web_host, port=args.web_port, use_reloader=False) -# Run the main function using anyio main() diff --git a/src/utils/__init__.py b/src/utils/__init__.py index f98d9d9..28953e0 100644 --- a/src/utils/__init__.py +++ b/src/utils/__init__.py @@ -2,9 +2,7 @@ from . import defines # Import rest as `utils.*` accessible -from .chunk import * -from .rss import * -from .chroma import * +from .rag import * # Expose only public names (avoid importing hidden/internal names) __all__ = [name for name in dir() if not name.startswith("_")] diff --git a/src/utils/defines.py b/src/utils/defines.py index d7ae53d..7ba8219 100644 --- a/src/utils/defines.py +++ b/src/utils/defines.py @@ -1,4 +1,4 @@ ollama_api_url="http://ollama:11434" # Default Ollama local endpoint -model="deepseek-r1:7b" +model="qwen2.5:7b" encoding_model="mxbai-embed-large" persist_directory="./chromadb" \ No newline at end of file diff --git a/src/utils/rag.py b/src/utils/rag.py new file mode 100644 index 0000000..8fb45ee --- /dev/null +++ b/src/utils/rag.py @@ -0,0 +1 @@ +rag = "exists" \ No newline at end of file