Docs, trademarks, and system info output

This commit is contained in:
James Ketr 2025-04-01 13:59:28 -07:00
parent f5ce84a310
commit 973b442642
9 changed files with 243 additions and 40 deletions

View File

@ -120,7 +120,7 @@ RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
} > /opt/ipex-llm/shell ; \
chmod +x /opt/ipex-llm/shell
@ -214,7 +214,7 @@ RUN pip install "transformers>=4.45.1"
RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
# Install ollama python module
RUN pip install ollama
RUN pip install ollama langchain-ollama
# pydle does not work with newer asyncio due to coroutine
# being deprecated. Patch to work.
@ -226,7 +226,7 @@ RUN pip install pydle \
&& rm /opt/pydle.patch
RUN pip install setuptools --upgrade
RUN pip install ollama
RUN pip install ollama langchain-ollama
RUN pip install feedparser bs4 chromadb
RUN pip install tiktoken
RUN pip install flask flask_cors flask_sock
@ -281,6 +281,7 @@ RUN apt-get update \
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
RUN usermod -aG ze-monitor root
COPY /src/ /opt/airc/src/
@ -345,7 +346,9 @@ RUN apt-get update \
WORKDIR /opt/ollama
# Download the nightly ollama release from ipex-llm
RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
RUN wget -qO - ${OLLAMA_VERSION} | \
tar --strip-components=1 -C . -xzv
# Install Python from Oracular (ollama works with 3.12)
@ -367,7 +370,7 @@ RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/ollama/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \
} > /opt/ollama/shell ; \
chmod +x /opt/ollama/shell
@ -375,7 +378,7 @@ RUN { \
SHELL [ "/opt/ollama/shell" ]
# Install ollama python module
RUN pip install ollama
RUN pip install ollama langchain-ollama
SHELL [ "/bin/bash", "-c" ]
@ -393,10 +396,11 @@ RUN { \
echo ''; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
echo ' echo "Dropping to shell"'; \
echo ' exec /bin/bash'; \
echo ' shift'; \
echo ' if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \
echo 'else'; \
echo ' echo "Launching Ollama server..."'; \
echo ' exec ./ollama serve'; \
echo ' exec ollama serve'; \
echo 'fi'; \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
@ -407,8 +411,11 @@ RUN { \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/ollama"'; \
echo 'source /opt/ollama/venv/bin/activate'; \
echo './ollama pull mxbai-embed-large' ; \
echo './ollama pull deepseek-r1:7b' ; \
echo 'ollama pull qwen2.5:7b' ; \
echo 'ollama pull llama3.2' ; \
echo 'ollama pull mxbai-embed-large' ; \
echo 'ollama pull deepseek-r1:7b' ; \
echo 'ollama pull mistral:7b' ; \
} > /fetch-models.sh \
&& chmod +x /fetch-models.sh
@ -416,6 +423,8 @@ ENV PYTHONUNBUFFERED=1
VOLUME [" /root/.ollama" ]
ENV PATH=/opt/ollama:${PATH}
ENTRYPOINT [ "/entrypoint.sh" ]
FROM airc AS jupyter
@ -455,7 +464,6 @@ RUN { \
echo ' --notebook-dir=/opt/jupyter \' ; \
echo ' --port 8888 \' ; \
echo ' --ip 0.0.0.0 \' ; \
echo ' --no-browser \' ; \
echo ' --allow-root \' ; \
echo ' --ServerApp.token= \' ; \
echo ' --ServerApp.password= \' ; \
@ -469,6 +477,8 @@ RUN { \
} > /entrypoint-jupyter.sh \
&& chmod +x /entrypoint-jupyter.sh
# echo ' --no-browser \' ; \
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
FROM ubuntu:oracular AS miniircd

View File

@ -2,9 +2,9 @@
AI is Really Cool
This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM.
Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them.
@ -31,16 +31,56 @@ cd airc
docker compose build
```
## Containers
This project provides the following containers:
| Container | Purpose |
|:----------|:---------------------------------------------------------------|
| airc | Base container with GPU packages installed and configured |
| jupyter | airc + Jupyter notebook for running Jupyter sessions |
| miniircd | Tiny deployment of an IRC server for testing IRC agents |
| ollama | Installation of Intel's pre-built Ollama.cpp |
While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
Prior to using a new model, you need to download it:
```bash
MODEL=qwen2.5:7b
docker compose exec -it ollama ollama pull ${MODEL}
```
To download many common models for testing against, you can use the `fetch-models.sh` script which will download:
* qwen2.5:7b
* llama3.2
* mxbai-embed-large
* deepseek-r1:7b
* mistral:7b
```bash
docker compose exec -it ollama /fetch-models.sh
```
The persisted volume mount can grow quite large with models, GPU kernel caching, etc. During the development of this project, the `./cache` directory has grown to consume ~250G of disk space.
## Running
In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
In order to download Hugging Face models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
Edit .env to add the following:
```.env
HF_ACCESS_TOKEN=<access token from huggingface>
HF_HOME=/root/.cache
```
HF_HOME is set for running in the containers to point to a volume mounted
directory which will enable model downloads to be persisted.
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
### AIRC

View File

@ -21,7 +21,7 @@ services:
- ./src:/opt/airc/src:rw
- ./doc:/opt/airc/doc:ro
- ./results:/opt/airc/results:rw
cap_add: # used for running ze-monitor within airc container
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -47,7 +47,7 @@ services:
volumes:
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache
- ./ollama:/root/.ollama # Cache the ollama models
cap_add: # used for running ze-monitor within airc container
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -84,7 +84,11 @@ services:
memory: "0" # No reserved memory (optional)
ulimits:
memlock: -1 # Prevents memory from being locked
oom_kill_disable: true # Prevents OOM killer from killing the container
#oom_kill_disable: true # Prevents OOM killer from killing the container
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
miniircd:
build:

View File

@ -10,6 +10,32 @@ div {
flex-direction: column;
}
.SystemInfo {
display: flex;
flex-direction: column;
gap: 5px;
padding: 5px;
flex-grow: 1;
}
.SystemInfoItem {
display: flex; /* Grid for individual items */
flex-direction: row;
flex-grow: 1;
}
.SystemInfoItem > div:first-child {
display: flex;
justify-self: end; /* Align the first column content to the right */
width: 10rem;
}
.SystemInfoItem > div:last-child {
display: flex;
flex-grow: 1;
justify-self: end; /* Align the first column content to the right */
}
.ChatBox {
display: flex;
flex-direction: column;

View File

@ -1,4 +1,4 @@
import React, { useState, useEffect, useRef, useCallback } from 'react';
import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
import FormGroup from '@mui/material/FormGroup';
import FormControlLabel from '@mui/material/FormControlLabel';
import { useTheme } from '@mui/material';
@ -70,6 +70,7 @@ interface ControlsParams {
tools: Tool[],
rags: Tool[],
systemPrompt: string,
systemInfo: SystemInfo,
toggleTool: (tool: Tool) => void,
toggleRag: (tool: Tool) => void,
setRags: (rags: Tool[]) => void,
@ -77,7 +78,50 @@ interface ControlsParams {
reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise<void>
};
const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset }: ControlsParams) => {
type SystemInfo = {
"Installed RAM (GB)": string,
"Graphics Cards": string[],
"CPU": string
};
const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => {
const [systemElements, setSystemElements] = useState<ReactElement[]>([]);
const convertToSymbols = (text: string) => {
return text
.replace(/\(R\)/g, '®') // Replace (R) with the ® symbol
.replace(/\(C\)/g, '©') // Replace (C) with the © symbol
.replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol
};
useEffect(() => {
const elements = Object.entries(systemInfo).flatMap(([k, v]) => {
// If v is an array, repeat for each card
if (Array.isArray(v)) {
return v.map((card, index) => (
<div key={index} className="SystemInfoItem">
<div>{convertToSymbols(k)} {index}</div>
<div>{convertToSymbols(card)}</div>
</div>
));
}
// If it's not an array, handle normally
return (
<div key={k} className="SystemInfoItem">
<div>{convertToSymbols(k)}</div>
<div>{convertToSymbols(String(v))}</div>
</div>
);
});
setSystemElements(elements);
}, [systemInfo]);
return <div className="SystemInfo">{systemElements}</div>;
};
const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset, systemInfo }: ControlsParams) => {
const [editSystemPrompt, setEditSystemPrompt] = useState<string>(systemPrompt);
useEffect(() => {
@ -110,9 +154,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
</Typography>
<Accordion>
<AccordionSummary
expandIcon={<ExpandMoreIcon />}
>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">System Prompt</Typography>
</AccordionSummary>
<AccordionActions style={{ flexDirection: "column" }}>
@ -135,9 +177,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
</AccordionActions>
</Accordion>
<Accordion>
<AccordionSummary
expandIcon={<ExpandMoreIcon />}
>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">Tools</Typography>
</AccordionSummary>
<AccordionDetails>
@ -157,9 +197,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
</AccordionActions>
</Accordion>
<Accordion>
<AccordionSummary
expandIcon={<ExpandMoreIcon />}
>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">RAG</Typography>
</AccordionSummary>
<AccordionDetails>
@ -178,6 +216,17 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
}</FormGroup>
</AccordionActions>
</Accordion>
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">System Information</Typography>
</AccordionSummary>
<AccordionDetails>
The server is running on the following hardware:
</AccordionDetails>
<AccordionActions>
<SystemInfoComponent systemInfo={systemInfo} />
</AccordionActions>
</Accordion>
<Button onClick={() => { reset(["history"], "History cleared."); }}>Clear Chat History</Button>
<Button onClick={() => { reset(["rags", "tools", "system-prompt"], "Default settings restored.") }}>Reset to defaults</Button>
</div>);
@ -199,6 +248,7 @@ const App = () => {
const [rags, setRags] = useState<Tool[]>([]);
const [systemPrompt, setSystemPrompt] = useState<string>("");
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
// Scroll to bottom of conversation when conversation updates
useEffect(() => {
@ -214,6 +264,27 @@ const App = () => {
setSnackOpen(true);
}, []);
// Get the system information
useEffect(() => {
if (systemInfo !== undefined || sessionId === undefined) {
return;
}
fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
})
.then(response => response.json())
.then(data => {
setSystemInfo(data);
})
.catch(error => {
console.error('Error obtaining system information:', error);
setSnack("Unable to obtain system information.", "error");
});
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
// Set the initial chat history to "loading" or the welcome message if loaded.
useEffect(() => {
if (sessionId === undefined) {
@ -468,7 +539,7 @@ const App = () => {
const drawer = (
<>
{sessionId !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt }} />}
{sessionId !== undefined && systemInfo !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt, systemInfo }} />}
</>
);
@ -735,7 +806,8 @@ const App = () => {
)}
{message.role === 'assistant' ? (
<div className="markdown-content">
<Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} />
<Markdown children={formattedContent} />
{/* <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} /> */}
</div>
) : (
<div>{formattedContent}</div>

View File

@ -3,7 +3,6 @@
# Standard library modules (no try-except needed)
import argparse
import asyncio
import anyio
import json
import logging
import os
@ -15,6 +14,8 @@ import textwrap
import threading
import uuid
import random
import subprocess
import re
def try_import(module_name, pip_name=None):
try:
@ -26,7 +27,6 @@ def try_import(module_name, pip_name=None):
# Third-party modules with import checks
try_import('gradio')
try_import('ollama')
try_import('openai')
try_import('pytz')
try_import('requests')
try_import('yfinance', 'yfinance')
@ -35,13 +35,13 @@ try_import('geopy', 'geopy')
try_import('hyphen', 'PyHyphen')
try_import('bs4', 'beautifulsoup4')
try_import('nltk')
try_import('fastapi')
import nltk
from dotenv import load_dotenv
from geopy.geocoders import Nominatim
import gradio as gr
import ollama
import openai
import pytz
import requests
import yfinance as yf
@ -50,6 +50,7 @@ from bs4 import BeautifulSoup
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
from utils import rag
from tools import (
get_weather_by_location,
@ -63,11 +64,59 @@ rags = [
{ "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
]
def get_installed_ram():
try:
with open('/proc/meminfo', 'r') as f:
meminfo = f.read()
match = re.search(r'MemTotal:\s+(\d+)', meminfo)
if match:
return f"{round(int(match.group(1)) / 1024**2, 2)}GB" # Convert KB to GB
except Exception as e:
return f"Error retrieving RAM: {e}"
def get_graphics_cards():
gpus = []
try:
# Run the ze-monitor utility
result = subprocess.run(['ze-monitor'], capture_output=True, text=True, check=True)
# Clean up the output (remove leading/trailing whitespace and newlines)
output = result.stdout.strip()
for line in output.splitlines():
# Updated regex to handle GPU names containing parentheses
match = re.match(r'^[^(]*\((.*)\)', line)
if match:
gpus.append(match.group(1))
return gpus
except Exception as e:
return f"Error retrieving GPU info: {e}"
def get_cpu_info():
try:
with open('/proc/cpuinfo', 'r') as f:
cpuinfo = f.read()
model_match = re.search(r'model name\s+:\s+(.+)', cpuinfo)
cores_match = re.findall(r'processor\s+:\s+\d+', cpuinfo)
if model_match and cores_match:
return f"{model_match.group(1)} with {len(cores_match)} cores"
except Exception as e:
return f"Error retrieving CPU info: {e}"
def system_info():
return {
"Installed RAM": get_installed_ram(),
"Graphics Card": get_graphics_cards(),
"CPU": get_cpu_info()
}
# %%
# Defaults
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
#MODEL_NAME = "deepseek-r1:7b"
MODEL_NAME = "llama3.2"
#MODEL_NAME = "llama3.2"
MODEL_NAME = "qwen2.5:7b"
LOG_LEVEL="debug"
USE_TLS=False
WEB_HOST="0.0.0.0"
@ -419,7 +468,11 @@ class WebServer:
context = self.upsert_context(context_id)
system_prompt = context["system"][0]["content"];
return JSONResponse({ "system-prompt": system_prompt })
@self.app.get('/api/system-info/{context_id}')
async def get_system_info(context_id: str):
return JSONResponse(system_info())
@self.app.post('/api/chat/{context_id}')
async def chat_endpoint(context_id: str, request: Request):
context = self.upsert_context(context_id)
@ -662,5 +715,4 @@ def main():
logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)
# Run the main function using anyio
main()

View File

@ -2,9 +2,7 @@
from . import defines
# Import rest as `utils.*` accessible
from .chunk import *
from .rss import *
from .chroma import *
from .rag import *
# Expose only public names (avoid importing hidden/internal names)
__all__ = [name for name in dir() if not name.startswith("_")]

View File

@ -1,4 +1,4 @@
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
model="deepseek-r1:7b"
model="qwen2.5:7b"
encoding_model="mxbai-embed-large"
persist_directory="./chromadb"

1
src/utils/rag.py Normal file
View File

@ -0,0 +1 @@
rag = "exists"