Docs, trademarks, and system info output

2025-04-01 13:59:28 -07:00 · 2025-04-01 13:59:28 -07:00 · 973b442642
commit 973b442642
parent f5ce84a310
9 changed files with 243 additions and 40 deletions
--- a/32
+++ b/32
@ -120,7 +120,7 @@ RUN { \
    echo '#!/bin/bash' ; \
    echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
    echo 'source /opt/ipex-llm/venv/bin/activate' ; \
-    echo 'bash -c "${@}"' ; \
+    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
    } > /opt/ipex-llm/shell ; \
    chmod +x /opt/ipex-llm/shell
@ -214,7 +214,7 @@ RUN pip install "transformers>=4.45.1"
 RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
 # Install ollama python module
-RUN pip install ollama
+RUN pip install ollama langchain-ollama
 # pydle does not work with newer asyncio due to coroutine
 # being deprecated. Patch to work.
@ -226,7 +226,7 @@ RUN pip install pydle \
    && rm /opt/pydle.patch
 RUN pip install setuptools --upgrade
-RUN pip install ollama 
+RUN pip install ollama langchain-ollama
 RUN pip install feedparser bs4 chromadb
 RUN pip install tiktoken
 RUN pip install flask flask_cors flask_sock
@ -281,6 +281,7 @@ RUN apt-get update \
 COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
 RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
 RUN usermod -aG ze-monitor root 
 COPY /src/ /opt/airc/src/
@ -345,7 +346,9 @@ RUN apt-get update \
 WORKDIR /opt/ollama
 # Download the nightly ollama release from ipex-llm
-RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
+#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
 ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
 RUN wget -qO - ${OLLAMA_VERSION} | \
    tar --strip-components=1 -C . -xzv 
 # Install Python from Oracular (ollama works with 3.12)
@ -367,7 +370,7 @@ RUN { \
    echo '#!/bin/bash' ; \
    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
    echo 'source /opt/ollama/venv/bin/activate' ; \
-    echo 'bash -c "${@}"' ; \
+    echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \
    } > /opt/ollama/shell ; \
    chmod +x /opt/ollama/shell
@ -375,7 +378,7 @@ RUN { \
 SHELL [ "/opt/ollama/shell" ]
 # Install ollama python module
-RUN pip install ollama
+RUN pip install ollama langchain-ollama
 SHELL [ "/bin/bash", "-c" ]
@ -393,10 +396,11 @@ RUN { \
    echo ''; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
-    echo '  exec /bin/bash'; \
+    echo '  shift'; \
    echo '  if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \
    echo 'else'; \
    echo '  echo "Launching Ollama server..."'; \
-    echo '  exec ./ollama serve'; \
+    echo '  exec ollama serve'; \
    echo 'fi'; \
    } > /entrypoint.sh \
    && chmod +x /entrypoint.sh
@ -407,8 +411,11 @@ RUN { \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/ollama"'; \
    echo 'source /opt/ollama/venv/bin/activate'; \
-    echo './ollama pull mxbai-embed-large' ; \
+    echo 'ollama pull qwen2.5:7b' ; \
-    echo './ollama pull deepseek-r1:7b' ; \
+    echo 'ollama pull llama3.2' ; \
    echo 'ollama pull mxbai-embed-large' ; \
    echo 'ollama pull deepseek-r1:7b' ; \
    echo 'ollama pull mistral:7b' ; \
    } > /fetch-models.sh \
    && chmod +x /fetch-models.sh
@ -416,6 +423,8 @@ ENV PYTHONUNBUFFERED=1
 VOLUME [" /root/.ollama" ]
 ENV PATH=/opt/ollama:${PATH}
 ENTRYPOINT [ "/entrypoint.sh" ]
 FROM airc AS jupyter
@ -455,7 +464,6 @@ RUN { \
    echo '    --notebook-dir=/opt/jupyter \' ; \
    echo '    --port 8888 \' ; \
    echo '    --ip 0.0.0.0 \' ; \
    echo '    --no-browser \' ; \
    echo '    --allow-root \' ; \
    echo '    --ServerApp.token= \' ; \
    echo '    --ServerApp.password= \' ; \
@ -469,6 +477,8 @@ RUN { \
    } > /entrypoint-jupyter.sh \
    && chmod +x /entrypoint-jupyter.sh
 # echo '    --no-browser \' ; \
 ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
 FROM ubuntu:oracular AS miniircd
--- a/README.md
+++ b/README.md
@ -2,9 +2,9 @@
 AI is Really Cool
-This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
+This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
-Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM.
+Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
 NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them. 
@ -31,16 +31,56 @@ cd airc
 docker compose build
 ```
 ## Containers
 This project provides the following containers:
 | Container | Purpose                                                        |
 |:----------|:---------------------------------------------------------------|
 | airc      | Base container with GPU packages installed and configured      |
 | jupyter   | airc + Jupyter notebook for running Jupyter sessions           |
 | miniircd  | Tiny deployment of an IRC server for testing IRC agents        |
 | ollama    | Installation of Intel's pre-built Ollama.cpp                   |
 While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
 To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
 Prior to using a new model, you need to download it:
 ```bash
 MODEL=qwen2.5:7b
 docker compose exec -it ollama ollama pull ${MODEL}
 ```
 To download many common models for testing against, you can use the `fetch-models.sh` script which will download:
 * qwen2.5:7b
 * llama3.2
 * mxbai-embed-large
 * deepseek-r1:7b
 * mistral:7b
 ```bash
 docker compose exec -it ollama /fetch-models.sh
 ```
 The persisted volume mount can grow quite large with models, GPU kernel caching, etc. During the development of this project, the `./cache` directory has grown to consume ~250G of disk space.
 ## Running
-In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
+In order to download Hugging Face models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
 Edit .env to add the following:
 ```.env
 HF_ACCESS_TOKEN=<access token from huggingface>
 HF_HOME=/root/.cache
 ```
 HF_HOME is set for running in the containers to point to a volume mounted
 directory which will enable model downloads to be persisted.
 NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
 ### AIRC
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -21,7 +21,7 @@ services:
      - ./src:/opt/airc/src:rw
      - ./doc:/opt/airc/doc:ro
      - ./results:/opt/airc/results:rw
-    cap_add: # used for running ze-monitor within airc container
+    cap_add: # used for running ze-monitor within container
      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -47,7 +47,7 @@ services:
    volumes:
      - ./cache:/root/.cache   # Cache hub models and neo_compiler_cache
      - ./ollama:/root/.ollama # Cache the ollama models
-    cap_add: # used for running ze-monitor within airc container
+    cap_add: # used for running ze-monitor within container
      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -84,7 +84,11 @@ services:
          memory: "0"  # No reserved memory (optional)
    ulimits:
      memlock: -1  # Prevents memory from being locked
-    oom_kill_disable: true  # Prevents OOM killer from killing the container
+    #oom_kill_disable: true  # Prevents OOM killer from killing the container
    cap_add: # used for running ze-monitor within container
      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
  miniircd:
    build:
--- a/src/ketr-chat/src/App.css
+++ b/src/ketr-chat/src/App.css
@ -10,6 +10,32 @@ div {
  flex-direction: column;
 }
 .SystemInfo {
  display: flex;
  flex-direction: column;
  gap: 5px;
  padding: 5px;
  flex-grow: 1;
 }
 .SystemInfoItem {
  display: flex; /* Grid for individual items */
  flex-direction: row;
  flex-grow: 1;
 }
 .SystemInfoItem > div:first-child {
  display: flex;
  justify-self: end; /* Align the first column content to the right */
  width: 10rem;
 }
 .SystemInfoItem > div:last-child {
  display: flex;
  flex-grow: 1;
  justify-self: end; /* Align the first column content to the right */
 }
 .ChatBox {
  display: flex;
  flex-direction: column;
--- a/src/ketr-chat/src/App.tsx
+++ b/src/ketr-chat/src/App.tsx
@ -1,4 +1,4 @@
-import React, { useState, useEffect, useRef, useCallback } from 'react';
+import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
 import FormGroup from '@mui/material/FormGroup';
 import FormControlLabel from '@mui/material/FormControlLabel';
 import { useTheme } from '@mui/material';
@ -70,6 +70,7 @@ interface ControlsParams {
  tools: Tool[],
  rags: Tool[],
  systemPrompt: string,
  systemInfo: SystemInfo,
  toggleTool: (tool: Tool) => void,
  toggleRag: (tool: Tool) => void,
  setRags: (rags: Tool[]) => void,
@ -77,7 +78,50 @@ interface ControlsParams {
  reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise<void>
 };
-const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset }: ControlsParams) => {
+type SystemInfo = {
  "Installed RAM (GB)": string,
  "Graphics Cards": string[],
  "CPU": string
 };
 const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => {
  const [systemElements, setSystemElements] = useState<ReactElement[]>([]);
  const convertToSymbols = (text: string) => {
    return text
      .replace(/\(R\)/g, '®')  // Replace (R) with the ® symbol
      .replace(/\(C\)/g, '©')  // Replace (C) with the © symbol
      .replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol
  };
  useEffect(() => {
    const elements = Object.entries(systemInfo).flatMap(([k, v]) => {
      // If v is an array, repeat for each card
      if (Array.isArray(v)) {
        return v.map((card, index) => (
          <div key={index} className="SystemInfoItem">
            <div>{convertToSymbols(k)} {index}</div>
            <div>{convertToSymbols(card)}</div>
          </div>
        ));
      }
      // If it's not an array, handle normally
      return (
        <div key={k} className="SystemInfoItem">
          <div>{convertToSymbols(k)}</div>
          <div>{convertToSymbols(String(v))}</div>
        </div>
      );
    });
    setSystemElements(elements);
  }, [systemInfo]);
  return <div className="SystemInfo">{systemElements}</div>;
 };
 const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset, systemInfo }: ControlsParams) => {
  const [editSystemPrompt, setEditSystemPrompt] = useState<string>(systemPrompt);
  useEffect(() => {
@ -110,9 +154,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
    </Typography>
    <Accordion>
-      <AccordionSummary
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        expandIcon={<ExpandMoreIcon />}
      >
        <Typography component="span">System Prompt</Typography>
      </AccordionSummary>
      <AccordionActions style={{ flexDirection: "column" }}>
@ -135,9 +177,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
      </AccordionActions>
    </Accordion>
    <Accordion>
-      <AccordionSummary
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        expandIcon={<ExpandMoreIcon />}
      >
        <Typography component="span">Tools</Typography>
      </AccordionSummary>
      <AccordionDetails>
@ -157,9 +197,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
      </AccordionActions>
    </Accordion>
    <Accordion>
-      <AccordionSummary
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        expandIcon={<ExpandMoreIcon />}
      >
        <Typography component="span">RAG</Typography>
      </AccordionSummary>
      <AccordionDetails>
@ -178,6 +216,17 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
          }</FormGroup>
      </AccordionActions>
    </Accordion>
    <Accordion>
      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        <Typography component="span">System Information</Typography>
      </AccordionSummary>
      <AccordionDetails>
        The server is running on the following hardware:
      </AccordionDetails>
      <AccordionActions>
        <SystemInfoComponent systemInfo={systemInfo} />
      </AccordionActions>
    </Accordion>
    <Button onClick={() => { reset(["history"], "History cleared."); }}>Clear Chat History</Button>
    <Button onClick={() => { reset(["rags", "tools", "system-prompt"], "Default settings restored.") }}>Reset to defaults</Button>
  </div>);
@ -199,6 +248,7 @@ const App = () => {
  const [rags, setRags] = useState<Tool[]>([]);
  const [systemPrompt, setSystemPrompt] = useState<string>("");
  const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
  const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
  // Scroll to bottom of conversation when conversation updates
  useEffect(() => {
@ -214,6 +264,27 @@ const App = () => {
    setSnackOpen(true);
  }, []);
  // Get the system information
  useEffect(() => {
    if (systemInfo !== undefined || sessionId === undefined) {
      return;
    }
    fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
      method: 'GET',
      headers: {
        'Content-Type': 'application/json',
      },
    })
      .then(response => response.json())
      .then(data => {
        setSystemInfo(data);
      })
      .catch(error => {
        console.error('Error obtaining system information:', error);
        setSnack("Unable to obtain system information.", "error");
      });
  }, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
  // Set the initial chat history to "loading" or the welcome message if loaded.
  useEffect(() => {
    if (sessionId === undefined) {
@ -468,7 +539,7 @@ const App = () => {
  const drawer = (
    <>
-      {sessionId !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt }} />}
+      {sessionId !== undefined && systemInfo !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt, systemInfo }} />}
    </>
  );
@ -735,7 +806,8 @@ const App = () => {
                      )}
                      {message.role === 'assistant' ? (
                        <div className="markdown-content">
-                          <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} />
+                          <Markdown children={formattedContent} />
                          {/* <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} /> */}
                        </div>
                      ) : (
                        <div>{formattedContent}</div>
--- a/src/server.py
+++ b/src/server.py
@ -3,7 +3,6 @@
 # Standard library modules (no try-except needed)
 import argparse
 import asyncio
 import anyio
 import json
 import logging
 import os
@ -15,6 +14,8 @@ import textwrap
 import threading
 import uuid
 import random
 import subprocess
 import re
 def try_import(module_name, pip_name=None):
    try:
@ -26,7 +27,6 @@ def try_import(module_name, pip_name=None):
 # Third-party modules with import checks
 try_import('gradio')
 try_import('ollama')
 try_import('openai')
 try_import('pytz')
 try_import('requests')
 try_import('yfinance', 'yfinance')
@ -35,13 +35,13 @@ try_import('geopy', 'geopy')
 try_import('hyphen', 'PyHyphen')
 try_import('bs4', 'beautifulsoup4')
 try_import('nltk')
 try_import('fastapi')
 import nltk
 from dotenv import load_dotenv
 from geopy.geocoders import Nominatim
 import gradio as gr
 import ollama
 import openai
 import pytz
 import requests
 import yfinance as yf
@ -50,6 +50,7 @@ from bs4 import BeautifulSoup
 from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
 from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
 from fastapi.middleware.cors import CORSMiddleware
 from utils import rag
 from tools import (
    get_weather_by_location,
@ -63,11 +64,59 @@ rags = [
    { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
 ]
 def get_installed_ram():
    try:
        with open('/proc/meminfo', 'r') as f:
            meminfo = f.read()
        match = re.search(r'MemTotal:\s+(\d+)', meminfo)
        if match:
            return f"{round(int(match.group(1)) / 1024**2, 2)}GB"  # Convert KB to GB
    except Exception as e:
        return f"Error retrieving RAM: {e}"
 def get_graphics_cards():
    gpus = []
    try:
        # Run the ze-monitor utility
        result = subprocess.run(['ze-monitor'], capture_output=True, text=True, check=True)
        # Clean up the output (remove leading/trailing whitespace and newlines)
        output = result.stdout.strip()
        for line in output.splitlines():
            # Updated regex to handle GPU names containing parentheses
            match = re.match(r'^[^(]*\((.*)\)', line)
            if match:
                gpus.append(match.group(1))
        return gpus
    except Exception as e:
        return f"Error retrieving GPU info: {e}"
 def get_cpu_info():
    try:
        with open('/proc/cpuinfo', 'r') as f:
            cpuinfo = f.read()
        model_match = re.search(r'model name\s+:\s+(.+)', cpuinfo)
        cores_match = re.findall(r'processor\s+:\s+\d+', cpuinfo)
        if model_match and cores_match:
            return f"{model_match.group(1)} with {len(cores_match)} cores"
    except Exception as e:
        return f"Error retrieving CPU info: {e}"
 def system_info():
    return {
        "Installed RAM": get_installed_ram(),
        "Graphics Card": get_graphics_cards(),
        "CPU": get_cpu_info()
    }
 # %%
 # Defaults
 OLLAMA_API_URL = "http://ollama:11434"  # Default Ollama local endpoint
 #MODEL_NAME = "deepseek-r1:7b"
-MODEL_NAME = "llama3.2"
+#MODEL_NAME = "llama3.2"
 MODEL_NAME = "qwen2.5:7b"
 LOG_LEVEL="debug"
 USE_TLS=False
 WEB_HOST="0.0.0.0"
@ -420,6 +469,10 @@ class WebServer:
            system_prompt = context["system"][0]["content"];
            return JSONResponse({ "system-prompt": system_prompt })
        @self.app.get('/api/system-info/{context_id}')
        async def get_system_info(context_id: str):
            return JSONResponse(system_info())
        @self.app.post('/api/chat/{context_id}')
        async def chat_endpoint(context_id: str, request: Request):
            context = self.upsert_context(context_id)
@ -662,5 +715,4 @@ def main():
    logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
    web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)
 # Run the main function using anyio
 main()
--- a/src/utils/init.py
+++ b/src/utils/init.py
@ -2,9 +2,7 @@
 from . import defines  
 # Import rest as `utils.*` accessible
-from .chunk import *  
+from .rag import *
 from .rss import *
 from .chroma import *
 # Expose only public names (avoid importing hidden/internal names)
 __all__ = [name for name in dir() if not name.startswith("_")]
--- a/src/utils/defines.py
+++ b/src/utils/defines.py
@ -1,4 +1,4 @@
 ollama_api_url="http://ollama:11434"  # Default Ollama local endpoint
-model="deepseek-r1:7b"
+model="qwen2.5:7b"
 encoding_model="mxbai-embed-large"
 persist_directory="./chromadb"
--- a/src/utils/rag.py
+++ b/src/utils/rag.py
@ -0,0 +1 @@
 rag = "exists"