Docs, trademarks, and system info output

2025-04-01 13:59:28 -07:00 · 2025-04-01 13:59:28 -07:00 · 973b442642
commit 973b442642
parent f5ce84a310
9 changed files with 243 additions and 40 deletions
--- a/32
+++ b/32
@ -120,7 +120,7 @@ RUN { \
    echo '#!/bin/bash' ; \
    echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
    echo 'source /opt/ipex-llm/venv/bin/activate' ; \
-    echo 'bash -c "${@}"' ; \
+    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
    } > /opt/ipex-llm/shell ; \
    chmod +x /opt/ipex-llm/shell

@ -214,7 +214,7 @@ RUN pip install "transformers>=4.45.1"
 RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'

 # Install ollama python module
-RUN pip install ollama
+RUN pip install ollama langchain-ollama

 # pydle does not work with newer asyncio due to coroutine
 # being deprecated. Patch to work.
@ -226,7 +226,7 @@ RUN pip install pydle \
    && rm /opt/pydle.patch

 RUN pip install setuptools --upgrade
-RUN pip install ollama 
+RUN pip install ollama langchain-ollama
 RUN pip install feedparser bs4 chromadb
 RUN pip install tiktoken
 RUN pip install flask flask_cors flask_sock
@ -281,6 +281,7 @@ RUN apt-get update \

 COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
 RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
+RUN usermod -aG ze-monitor root 

 COPY /src/ /opt/airc/src/

@ -345,7 +346,9 @@ RUN apt-get update \
 WORKDIR /opt/ollama

 # Download the nightly ollama release from ipex-llm
-RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
+#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
+ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
+RUN wget -qO - ${OLLAMA_VERSION} | \
    tar --strip-components=1 -C . -xzv 

 # Install Python from Oracular (ollama works with 3.12)
@ -367,7 +370,7 @@ RUN { \
    echo '#!/bin/bash' ; \
    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
    echo 'source /opt/ollama/venv/bin/activate' ; \
-    echo 'bash -c "${@}"' ; \
+    echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \
    } > /opt/ollama/shell ; \
    chmod +x /opt/ollama/shell

@ -375,7 +378,7 @@ RUN { \
 SHELL [ "/opt/ollama/shell" ]

 # Install ollama python module
-RUN pip install ollama
+RUN pip install ollama langchain-ollama

 SHELL [ "/bin/bash", "-c" ]

@ -393,10 +396,11 @@ RUN { \
    echo ''; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
-    echo '  exec /bin/bash'; \
+    echo '  shift'; \
+    echo '  if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \
    echo 'else'; \
    echo '  echo "Launching Ollama server..."'; \
-    echo '  exec ./ollama serve'; \
+    echo '  exec ollama serve'; \
    echo 'fi'; \
    } > /entrypoint.sh \
    && chmod +x /entrypoint.sh
@ -407,8 +411,11 @@ RUN { \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/ollama"'; \
    echo 'source /opt/ollama/venv/bin/activate'; \
-    echo './ollama pull mxbai-embed-large' ; \
-    echo './ollama pull deepseek-r1:7b' ; \
+    echo 'ollama pull qwen2.5:7b' ; \
+    echo 'ollama pull llama3.2' ; \
+    echo 'ollama pull mxbai-embed-large' ; \
+    echo 'ollama pull deepseek-r1:7b' ; \
+    echo 'ollama pull mistral:7b' ; \
    } > /fetch-models.sh \
    && chmod +x /fetch-models.sh

@ -416,6 +423,8 @@ ENV PYTHONUNBUFFERED=1

 VOLUME [" /root/.ollama" ]

+ENV PATH=/opt/ollama:${PATH}
+
 ENTRYPOINT [ "/entrypoint.sh" ]

 FROM airc AS jupyter
@ -455,7 +464,6 @@ RUN { \
    echo '    --notebook-dir=/opt/jupyter \' ; \
    echo '    --port 8888 \' ; \
    echo '    --ip 0.0.0.0 \' ; \
-    echo '    --no-browser \' ; \
    echo '    --allow-root \' ; \
    echo '    --ServerApp.token= \' ; \
    echo '    --ServerApp.password= \' ; \
@ -469,6 +477,8 @@ RUN { \
    } > /entrypoint-jupyter.sh \
    && chmod +x /entrypoint-jupyter.sh

+# echo '    --no-browser \' ; \
+
 ENTRYPOINT [ "/entrypoint-jupyter.sh" ]

 FROM ubuntu:oracular AS miniircd
--- a/README.md
+++ b/README.md
@ -2,9 +2,9 @@

 AI is Really Cool

-This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
+This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.

-Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM.
+Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)

 NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them. 

@ -31,16 +31,56 @@ cd airc
 docker compose build
 ```

+## Containers
+
+This project provides the following containers:
+
+| Container | Purpose                                                        |
+|:----------|:---------------------------------------------------------------|
+| airc      | Base container with GPU packages installed and configured      |
+| jupyter   | airc + Jupyter notebook for running Jupyter sessions           |
+| miniircd  | Tiny deployment of an IRC server for testing IRC agents        |
+| ollama    | Installation of Intel's pre-built Ollama.cpp                   |
+
+While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
+
+To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
+
+Prior to using a new model, you need to download it:
+
+```bash
+MODEL=qwen2.5:7b
+docker compose exec -it ollama ollama pull ${MODEL}
+```
+
+To download many common models for testing against, you can use the `fetch-models.sh` script which will download:
+
+* qwen2.5:7b
+* llama3.2
+* mxbai-embed-large
+* deepseek-r1:7b
+* mistral:7b
+
+```bash
+docker compose exec -it ollama /fetch-models.sh
+```
+
+The persisted volume mount can grow quite large with models, GPU kernel caching, etc. During the development of this project, the `./cache` directory has grown to consume ~250G of disk space.
+
 ## Running

-In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
+In order to download Hugging Face models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.

 Edit .env to add the following:

 ```.env
 HF_ACCESS_TOKEN=<access token from huggingface>
+HF_HOME=/root/.cache
 ```

+HF_HOME is set for running in the containers to point to a volume mounted
+directory which will enable model downloads to be persisted.
+
 NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.

 ### AIRC
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -21,7 +21,7 @@ services:
      - ./src:/opt/airc/src:rw
      - ./doc:/opt/airc/doc:ro
      - ./results:/opt/airc/results:rw
-    cap_add: # used for running ze-monitor within airc container
+    cap_add: # used for running ze-monitor within container
      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -47,7 +47,7 @@ services:
    volumes:
      - ./cache:/root/.cache   # Cache hub models and neo_compiler_cache
      - ./ollama:/root/.ollama # Cache the ollama models
-    cap_add: # used for running ze-monitor within airc container
+    cap_add: # used for running ze-monitor within container
      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -84,7 +84,11 @@ services:
          memory: "0"  # No reserved memory (optional)
    ulimits:
      memlock: -1  # Prevents memory from being locked
-    oom_kill_disable: true  # Prevents OOM killer from killing the container
+    #oom_kill_disable: true  # Prevents OOM killer from killing the container
+    cap_add: # used for running ze-monitor within container
+      - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
+      - CAP_PERFMON         # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
+      - CAP_SYS_PTRACE      # PTRACE_MODE_READ_REALCREDS ptrace access mode check

  miniircd:
    build:
--- a/src/ketr-chat/src/App.css
+++ b/src/ketr-chat/src/App.css
@ -10,6 +10,32 @@ div {
  flex-direction: column;
 }

+.SystemInfo {
+  display: flex;
+  flex-direction: column;
+  gap: 5px;
+  padding: 5px;
+  flex-grow: 1;
+}
+
+.SystemInfoItem {
+  display: flex; /* Grid for individual items */
+  flex-direction: row;
+  flex-grow: 1;
+}
+
+.SystemInfoItem > div:first-child {
+  display: flex;
+  justify-self: end; /* Align the first column content to the right */
+  width: 10rem;
+}
+
+.SystemInfoItem > div:last-child {
+  display: flex;
+  flex-grow: 1;
+  justify-self: end; /* Align the first column content to the right */
+}
+
 .ChatBox {
  display: flex;
  flex-direction: column;
--- a/src/ketr-chat/src/App.tsx
+++ b/src/ketr-chat/src/App.tsx
@ -1,4 +1,4 @@
-import React, { useState, useEffect, useRef, useCallback } from 'react';
+import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
 import FormGroup from '@mui/material/FormGroup';
 import FormControlLabel from '@mui/material/FormControlLabel';
 import { useTheme } from '@mui/material';
@ -70,6 +70,7 @@ interface ControlsParams {
  tools: Tool[],
  rags: Tool[],
  systemPrompt: string,
+  systemInfo: SystemInfo,
  toggleTool: (tool: Tool) => void,
  toggleRag: (tool: Tool) => void,
  setRags: (rags: Tool[]) => void,
@ -77,7 +78,50 @@ interface ControlsParams {
  reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise<void>
 };

-const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset }: ControlsParams) => {
+type SystemInfo = {
+  "Installed RAM (GB)": string,
+  "Graphics Cards": string[],
+  "CPU": string
+};
+
+const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => {
+  const [systemElements, setSystemElements] = useState<ReactElement[]>([]);
+
+  const convertToSymbols = (text: string) => {
+    return text
+      .replace(/\(R\)/g, '®')  // Replace (R) with the ® symbol
+      .replace(/\(C\)/g, '©')  // Replace (C) with the © symbol
+      .replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol
+  };
+
+  useEffect(() => {
+    const elements = Object.entries(systemInfo).flatMap(([k, v]) => {
+      // If v is an array, repeat for each card
+      if (Array.isArray(v)) {
+        return v.map((card, index) => (
+          <div key={index} className="SystemInfoItem">
+            <div>{convertToSymbols(k)} {index}</div>
+            <div>{convertToSymbols(card)}</div>
+          </div>
+        ));
+      }
+
+      // If it's not an array, handle normally
+      return (
+        <div key={k} className="SystemInfoItem">
+          <div>{convertToSymbols(k)}</div>
+          <div>{convertToSymbols(String(v))}</div>
+        </div>
+      );
+    });
+
+    setSystemElements(elements);
+  }, [systemInfo]);
+
+  return <div className="SystemInfo">{systemElements}</div>;
+};
+
+const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset, systemInfo }: ControlsParams) => {
  const [editSystemPrompt, setEditSystemPrompt] = useState<string>(systemPrompt);

  useEffect(() => {
@ -110,9 +154,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP

    </Typography>
    <Accordion>
-      <AccordionSummary
-        expandIcon={<ExpandMoreIcon />}
-      >
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        <Typography component="span">System Prompt</Typography>
      </AccordionSummary>
      <AccordionActions style={{ flexDirection: "column" }}>
@ -135,9 +177,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
      </AccordionActions>
    </Accordion>
    <Accordion>
-      <AccordionSummary
-        expandIcon={<ExpandMoreIcon />}
-      >
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        <Typography component="span">Tools</Typography>
      </AccordionSummary>
      <AccordionDetails>
@ -157,9 +197,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
      </AccordionActions>
    </Accordion>
    <Accordion>
-      <AccordionSummary
-        expandIcon={<ExpandMoreIcon />}
-      >
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
        <Typography component="span">RAG</Typography>
      </AccordionSummary>
      <AccordionDetails>
@ -178,6 +216,17 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
          }</FormGroup>
      </AccordionActions>
    </Accordion>
+    <Accordion>
+      <AccordionSummary expandIcon={<ExpandMoreIcon />}>
+        <Typography component="span">System Information</Typography>
+      </AccordionSummary>
+      <AccordionDetails>
+        The server is running on the following hardware:
+      </AccordionDetails>
+      <AccordionActions>
+        <SystemInfoComponent systemInfo={systemInfo} />
+      </AccordionActions>
+    </Accordion>
    <Button onClick={() => { reset(["history"], "History cleared."); }}>Clear Chat History</Button>
    <Button onClick={() => { reset(["rags", "tools", "system-prompt"], "Default settings restored.") }}>Reset to defaults</Button>
  </div>);
@ -199,6 +248,7 @@ const App = () => {
  const [rags, setRags] = useState<Tool[]>([]);
  const [systemPrompt, setSystemPrompt] = useState<string>("");
  const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
+  const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);

  // Scroll to bottom of conversation when conversation updates
  useEffect(() => {
@ -214,6 +264,27 @@ const App = () => {
    setSnackOpen(true);
  }, []);

+  // Get the system information
+  useEffect(() => {
+    if (systemInfo !== undefined || sessionId === undefined) {
+      return;
+    }
+    fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
+      method: 'GET',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+    })
+      .then(response => response.json())
+      .then(data => {
+        setSystemInfo(data);
+      })
+      .catch(error => {
+        console.error('Error obtaining system information:', error);
+        setSnack("Unable to obtain system information.", "error");
+      });
+  }, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
+
  // Set the initial chat history to "loading" or the welcome message if loaded.
  useEffect(() => {
    if (sessionId === undefined) {
@ -468,7 +539,7 @@ const App = () => {

  const drawer = (
    <>
-      {sessionId !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt }} />}
+      {sessionId !== undefined && systemInfo !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt, systemInfo }} />}
    </>
  );

@ -735,7 +806,8 @@ const App = () => {
                      )}
                      {message.role === 'assistant' ? (
                        <div className="markdown-content">
-                          <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} />
+                          <Markdown children={formattedContent} />
+                          {/* <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} /> */}
                        </div>
                      ) : (
                        <div>{formattedContent}</div>
--- a/src/server.py
+++ b/src/server.py
@ -3,7 +3,6 @@
 # Standard library modules (no try-except needed)
 import argparse
 import asyncio
-import anyio
 import json
 import logging
 import os
@ -15,6 +14,8 @@ import textwrap
 import threading
 import uuid
 import random
+import subprocess
+import re

 def try_import(module_name, pip_name=None):
    try:
@ -26,7 +27,6 @@ def try_import(module_name, pip_name=None):
 # Third-party modules with import checks
 try_import('gradio')
 try_import('ollama')
-try_import('openai')
 try_import('pytz')
 try_import('requests')
 try_import('yfinance', 'yfinance')
@ -35,13 +35,13 @@ try_import('geopy', 'geopy')
 try_import('hyphen', 'PyHyphen')
 try_import('bs4', 'beautifulsoup4')
 try_import('nltk')
+try_import('fastapi')

 import nltk
 from dotenv import load_dotenv
 from geopy.geocoders import Nominatim
 import gradio as gr
 import ollama
-import openai
 import pytz
 import requests
 import yfinance as yf
@ -50,6 +50,7 @@ from bs4 import BeautifulSoup
 from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
 from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
 from fastapi.middleware.cors import CORSMiddleware
+from utils import rag

 from tools import (
    get_weather_by_location,
@ -63,11 +64,59 @@ rags = [
    { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
 ]

+
+def get_installed_ram():
+    try:
+        with open('/proc/meminfo', 'r') as f:
+            meminfo = f.read()
+        match = re.search(r'MemTotal:\s+(\d+)', meminfo)
+        if match:
+            return f"{round(int(match.group(1)) / 1024**2, 2)}GB"  # Convert KB to GB
+    except Exception as e:
+        return f"Error retrieving RAM: {e}"
+
+def get_graphics_cards():
+    gpus = []
+    try:
+        # Run the ze-monitor utility
+        result = subprocess.run(['ze-monitor'], capture_output=True, text=True, check=True)
+        
+        # Clean up the output (remove leading/trailing whitespace and newlines)
+        output = result.stdout.strip()
+        for line in output.splitlines():
+            # Updated regex to handle GPU names containing parentheses
+            match = re.match(r'^[^(]*\((.*)\)', line)
+            if match:
+                gpus.append(match.group(1))
+        
+        return gpus
+    except Exception as e:
+        return f"Error retrieving GPU info: {e}"
+
+def get_cpu_info():
+    try:
+        with open('/proc/cpuinfo', 'r') as f:
+            cpuinfo = f.read()
+        model_match = re.search(r'model name\s+:\s+(.+)', cpuinfo)
+        cores_match = re.findall(r'processor\s+:\s+\d+', cpuinfo)
+        if model_match and cores_match:
+            return f"{model_match.group(1)} with {len(cores_match)} cores"
+    except Exception as e:
+        return f"Error retrieving CPU info: {e}"
+
+def system_info():
+    return {
+        "Installed RAM": get_installed_ram(),
+        "Graphics Card": get_graphics_cards(),
+        "CPU": get_cpu_info()
+    }
+
 # %%
 # Defaults
 OLLAMA_API_URL = "http://ollama:11434"  # Default Ollama local endpoint
 #MODEL_NAME = "deepseek-r1:7b"
-MODEL_NAME = "llama3.2"
+#MODEL_NAME = "llama3.2"
+MODEL_NAME = "qwen2.5:7b"
 LOG_LEVEL="debug"
 USE_TLS=False
 WEB_HOST="0.0.0.0"
@ -419,7 +468,11 @@ class WebServer:
            context = self.upsert_context(context_id)
            system_prompt = context["system"][0]["content"];
            return JSONResponse({ "system-prompt": system_prompt })
-        
+
+        @self.app.get('/api/system-info/{context_id}')
+        async def get_system_info(context_id: str):
+            return JSONResponse(system_info())
+
        @self.app.post('/api/chat/{context_id}')
        async def chat_endpoint(context_id: str, request: Request):
            context = self.upsert_context(context_id)
@ -662,5 +715,4 @@ def main():
    logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
    web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)

-# Run the main function using anyio
 main()
--- a/src/utils/init.py
+++ b/src/utils/init.py
@ -2,9 +2,7 @@
 from . import defines  

 # Import rest as `utils.*` accessible
-from .chunk import *  
-from .rss import *
-from .chroma import *
+from .rag import *

 # Expose only public names (avoid importing hidden/internal names)
 __all__ = [name for name in dir() if not name.startswith("_")]
--- a/src/utils/defines.py
+++ b/src/utils/defines.py
@ -1,4 +1,4 @@
 ollama_api_url="http://ollama:11434"  # Default Ollama local endpoint
-model="deepseek-r1:7b"
+model="qwen2.5:7b"
 encoding_model="mxbai-embed-large"
 persist_directory="./chromadb"
--- a/src/utils/rag.py
+++ b/src/utils/rag.py
@ -0,0 +1 @@
+rag = "exists"