Docs, trademarks, and system info output

This commit is contained in:
James Ketr 2025-04-01 13:59:28 -07:00
parent f5ce84a310
commit 973b442642
9 changed files with 243 additions and 40 deletions

View File

@ -120,7 +120,7 @@ RUN { \
echo '#!/bin/bash' ; \ echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \ echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/ipex-llm/venv/bin/activate' ; \ echo 'source /opt/ipex-llm/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
} > /opt/ipex-llm/shell ; \ } > /opt/ipex-llm/shell ; \
chmod +x /opt/ipex-llm/shell chmod +x /opt/ipex-llm/shell
@ -214,7 +214,7 @@ RUN pip install "transformers>=4.45.1"
RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl' RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
# Install ollama python module # Install ollama python module
RUN pip install ollama RUN pip install ollama langchain-ollama
# pydle does not work with newer asyncio due to coroutine # pydle does not work with newer asyncio due to coroutine
# being deprecated. Patch to work. # being deprecated. Patch to work.
@ -226,7 +226,7 @@ RUN pip install pydle \
&& rm /opt/pydle.patch && rm /opt/pydle.patch
RUN pip install setuptools --upgrade RUN pip install setuptools --upgrade
RUN pip install ollama RUN pip install ollama langchain-ollama
RUN pip install feedparser bs4 chromadb RUN pip install feedparser bs4 chromadb
RUN pip install tiktoken RUN pip install tiktoken
RUN pip install flask flask_cors flask_sock RUN pip install flask flask_cors flask_sock
@ -281,6 +281,7 @@ RUN apt-get update \
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/ COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
RUN usermod -aG ze-monitor root
COPY /src/ /opt/airc/src/ COPY /src/ /opt/airc/src/
@ -345,7 +346,9 @@ RUN apt-get update \
WORKDIR /opt/ollama WORKDIR /opt/ollama
# Download the nightly ollama release from ipex-llm # Download the nightly ollama release from ipex-llm
RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \ #ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
RUN wget -qO - ${OLLAMA_VERSION} | \
tar --strip-components=1 -C . -xzv tar --strip-components=1 -C . -xzv
# Install Python from Oracular (ollama works with 3.12) # Install Python from Oracular (ollama works with 3.12)
@ -367,7 +370,7 @@ RUN { \
echo '#!/bin/bash' ; \ echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \ update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/ollama/venv/bin/activate' ; \ echo 'source /opt/ollama/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \
} > /opt/ollama/shell ; \ } > /opt/ollama/shell ; \
chmod +x /opt/ollama/shell chmod +x /opt/ollama/shell
@ -375,7 +378,7 @@ RUN { \
SHELL [ "/opt/ollama/shell" ] SHELL [ "/opt/ollama/shell" ]
# Install ollama python module # Install ollama python module
RUN pip install ollama RUN pip install ollama langchain-ollama
SHELL [ "/bin/bash", "-c" ] SHELL [ "/bin/bash", "-c" ]
@ -393,10 +396,11 @@ RUN { \
echo ''; \ echo ''; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
echo ' echo "Dropping to shell"'; \ echo ' echo "Dropping to shell"'; \
echo ' exec /bin/bash'; \ echo ' shift'; \
echo ' if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \
echo 'else'; \ echo 'else'; \
echo ' echo "Launching Ollama server..."'; \ echo ' echo "Launching Ollama server..."'; \
echo ' exec ./ollama serve'; \ echo ' exec ollama serve'; \
echo 'fi'; \ echo 'fi'; \
} > /entrypoint.sh \ } > /entrypoint.sh \
&& chmod +x /entrypoint.sh && chmod +x /entrypoint.sh
@ -407,8 +411,11 @@ RUN { \
echo 'set -e'; \ echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/ollama"'; \ echo 'echo "Setting pip environment to /opt/ollama"'; \
echo 'source /opt/ollama/venv/bin/activate'; \ echo 'source /opt/ollama/venv/bin/activate'; \
echo './ollama pull mxbai-embed-large' ; \ echo 'ollama pull qwen2.5:7b' ; \
echo './ollama pull deepseek-r1:7b' ; \ echo 'ollama pull llama3.2' ; \
echo 'ollama pull mxbai-embed-large' ; \
echo 'ollama pull deepseek-r1:7b' ; \
echo 'ollama pull mistral:7b' ; \
} > /fetch-models.sh \ } > /fetch-models.sh \
&& chmod +x /fetch-models.sh && chmod +x /fetch-models.sh
@ -416,6 +423,8 @@ ENV PYTHONUNBUFFERED=1
VOLUME [" /root/.ollama" ] VOLUME [" /root/.ollama" ]
ENV PATH=/opt/ollama:${PATH}
ENTRYPOINT [ "/entrypoint.sh" ] ENTRYPOINT [ "/entrypoint.sh" ]
FROM airc AS jupyter FROM airc AS jupyter
@ -455,7 +464,6 @@ RUN { \
echo ' --notebook-dir=/opt/jupyter \' ; \ echo ' --notebook-dir=/opt/jupyter \' ; \
echo ' --port 8888 \' ; \ echo ' --port 8888 \' ; \
echo ' --ip 0.0.0.0 \' ; \ echo ' --ip 0.0.0.0 \' ; \
echo ' --no-browser \' ; \
echo ' --allow-root \' ; \ echo ' --allow-root \' ; \
echo ' --ServerApp.token= \' ; \ echo ' --ServerApp.token= \' ; \
echo ' --ServerApp.password= \' ; \ echo ' --ServerApp.password= \' ; \
@ -469,6 +477,8 @@ RUN { \
} > /entrypoint-jupyter.sh \ } > /entrypoint-jupyter.sh \
&& chmod +x /entrypoint-jupyter.sh && chmod +x /entrypoint-jupyter.sh
# echo ' --no-browser \' ; \
ENTRYPOINT [ "/entrypoint-jupyter.sh" ] ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
FROM ubuntu:oracular AS miniircd FROM ubuntu:oracular AS miniircd

View File

@ -2,9 +2,9 @@
AI is Really Cool AI is Really Cool
This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds. This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM. Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them. NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them.
@ -31,16 +31,56 @@ cd airc
docker compose build docker compose build
``` ```
## Containers
This project provides the following containers:
| Container | Purpose |
|:----------|:---------------------------------------------------------------|
| airc | Base container with GPU packages installed and configured |
| jupyter | airc + Jupyter notebook for running Jupyter sessions |
| miniircd | Tiny deployment of an IRC server for testing IRC agents |
| ollama | Installation of Intel's pre-built Ollama.cpp |
While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
Prior to using a new model, you need to download it:
```bash
MODEL=qwen2.5:7b
docker compose exec -it ollama ollama pull ${MODEL}
```
To download many common models for testing against, you can use the `fetch-models.sh` script which will download:
* qwen2.5:7b
* llama3.2
* mxbai-embed-large
* deepseek-r1:7b
* mistral:7b
```bash
docker compose exec -it ollama /fetch-models.sh
```
The persisted volume mount can grow quite large with models, GPU kernel caching, etc. During the development of this project, the `./cache` directory has grown to consume ~250G of disk space.
## Running ## Running
In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token. In order to download Hugging Face models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
Edit .env to add the following: Edit .env to add the following:
```.env ```.env
HF_ACCESS_TOKEN=<access token from huggingface> HF_ACCESS_TOKEN=<access token from huggingface>
HF_HOME=/root/.cache
``` ```
HF_HOME is set for running in the containers to point to a volume mounted
directory which will enable model downloads to be persisted.
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container. NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
### AIRC ### AIRC

View File

@ -21,7 +21,7 @@ services:
- ./src:/opt/airc/src:rw - ./src:/opt/airc/src:rw
- ./doc:/opt/airc/doc:ro - ./doc:/opt/airc/doc:ro
- ./results:/opt/airc/results:rw - ./results:/opt/airc/results:rw
cap_add: # used for running ze-monitor within airc container cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -47,7 +47,7 @@ services:
volumes: volumes:
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache - ./cache:/root/.cache # Cache hub models and neo_compiler_cache
- ./ollama:/root/.ollama # Cache the ollama models - ./ollama:/root/.ollama # Cache the ollama models
cap_add: # used for running ze-monitor within airc container cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
@ -84,7 +84,11 @@ services:
memory: "0" # No reserved memory (optional) memory: "0" # No reserved memory (optional)
ulimits: ulimits:
memlock: -1 # Prevents memory from being locked memlock: -1 # Prevents memory from being locked
oom_kill_disable: true # Prevents OOM killer from killing the container #oom_kill_disable: true # Prevents OOM killer from killing the container
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
miniircd: miniircd:
build: build:

View File

@ -10,6 +10,32 @@ div {
flex-direction: column; flex-direction: column;
} }
.SystemInfo {
display: flex;
flex-direction: column;
gap: 5px;
padding: 5px;
flex-grow: 1;
}
.SystemInfoItem {
display: flex; /* Grid for individual items */
flex-direction: row;
flex-grow: 1;
}
.SystemInfoItem > div:first-child {
display: flex;
justify-self: end; /* Align the first column content to the right */
width: 10rem;
}
.SystemInfoItem > div:last-child {
display: flex;
flex-grow: 1;
justify-self: end; /* Align the first column content to the right */
}
.ChatBox { .ChatBox {
display: flex; display: flex;
flex-direction: column; flex-direction: column;

View File

@ -1,4 +1,4 @@
import React, { useState, useEffect, useRef, useCallback } from 'react'; import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
import FormGroup from '@mui/material/FormGroup'; import FormGroup from '@mui/material/FormGroup';
import FormControlLabel from '@mui/material/FormControlLabel'; import FormControlLabel from '@mui/material/FormControlLabel';
import { useTheme } from '@mui/material'; import { useTheme } from '@mui/material';
@ -70,6 +70,7 @@ interface ControlsParams {
tools: Tool[], tools: Tool[],
rags: Tool[], rags: Tool[],
systemPrompt: string, systemPrompt: string,
systemInfo: SystemInfo,
toggleTool: (tool: Tool) => void, toggleTool: (tool: Tool) => void,
toggleRag: (tool: Tool) => void, toggleRag: (tool: Tool) => void,
setRags: (rags: Tool[]) => void, setRags: (rags: Tool[]) => void,
@ -77,7 +78,50 @@ interface ControlsParams {
reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise<void> reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise<void>
}; };
const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset }: ControlsParams) => { type SystemInfo = {
"Installed RAM (GB)": string,
"Graphics Cards": string[],
"CPU": string
};
const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => {
const [systemElements, setSystemElements] = useState<ReactElement[]>([]);
const convertToSymbols = (text: string) => {
return text
.replace(/\(R\)/g, '®') // Replace (R) with the ® symbol
.replace(/\(C\)/g, '©') // Replace (C) with the © symbol
.replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol
};
useEffect(() => {
const elements = Object.entries(systemInfo).flatMap(([k, v]) => {
// If v is an array, repeat for each card
if (Array.isArray(v)) {
return v.map((card, index) => (
<div key={index} className="SystemInfoItem">
<div>{convertToSymbols(k)} {index}</div>
<div>{convertToSymbols(card)}</div>
</div>
));
}
// If it's not an array, handle normally
return (
<div key={k} className="SystemInfoItem">
<div>{convertToSymbols(k)}</div>
<div>{convertToSymbols(String(v))}</div>
</div>
);
});
setSystemElements(elements);
}, [systemInfo]);
return <div className="SystemInfo">{systemElements}</div>;
};
const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset, systemInfo }: ControlsParams) => {
const [editSystemPrompt, setEditSystemPrompt] = useState<string>(systemPrompt); const [editSystemPrompt, setEditSystemPrompt] = useState<string>(systemPrompt);
useEffect(() => { useEffect(() => {
@ -110,9 +154,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
</Typography> </Typography>
<Accordion> <Accordion>
<AccordionSummary <AccordionSummary expandIcon={<ExpandMoreIcon />}>
expandIcon={<ExpandMoreIcon />}
>
<Typography component="span">System Prompt</Typography> <Typography component="span">System Prompt</Typography>
</AccordionSummary> </AccordionSummary>
<AccordionActions style={{ flexDirection: "column" }}> <AccordionActions style={{ flexDirection: "column" }}>
@ -135,9 +177,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
</AccordionActions> </AccordionActions>
</Accordion> </Accordion>
<Accordion> <Accordion>
<AccordionSummary <AccordionSummary expandIcon={<ExpandMoreIcon />}>
expandIcon={<ExpandMoreIcon />}
>
<Typography component="span">Tools</Typography> <Typography component="span">Tools</Typography>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
@ -157,9 +197,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
</AccordionActions> </AccordionActions>
</Accordion> </Accordion>
<Accordion> <Accordion>
<AccordionSummary <AccordionSummary expandIcon={<ExpandMoreIcon />}>
expandIcon={<ExpandMoreIcon />}
>
<Typography component="span">RAG</Typography> <Typography component="span">RAG</Typography>
</AccordionSummary> </AccordionSummary>
<AccordionDetails> <AccordionDetails>
@ -178,6 +216,17 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
}</FormGroup> }</FormGroup>
</AccordionActions> </AccordionActions>
</Accordion> </Accordion>
<Accordion>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Typography component="span">System Information</Typography>
</AccordionSummary>
<AccordionDetails>
The server is running on the following hardware:
</AccordionDetails>
<AccordionActions>
<SystemInfoComponent systemInfo={systemInfo} />
</AccordionActions>
</Accordion>
<Button onClick={() => { reset(["history"], "History cleared."); }}>Clear Chat History</Button> <Button onClick={() => { reset(["history"], "History cleared."); }}>Clear Chat History</Button>
<Button onClick={() => { reset(["rags", "tools", "system-prompt"], "Default settings restored.") }}>Reset to defaults</Button> <Button onClick={() => { reset(["rags", "tools", "system-prompt"], "Default settings restored.") }}>Reset to defaults</Button>
</div>); </div>);
@ -199,6 +248,7 @@ const App = () => {
const [rags, setRags] = useState<Tool[]>([]); const [rags, setRags] = useState<Tool[]>([]);
const [systemPrompt, setSystemPrompt] = useState<string>(""); const [systemPrompt, setSystemPrompt] = useState<string>("");
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>(""); const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
// Scroll to bottom of conversation when conversation updates // Scroll to bottom of conversation when conversation updates
useEffect(() => { useEffect(() => {
@ -214,6 +264,27 @@ const App = () => {
setSnackOpen(true); setSnackOpen(true);
}, []); }, []);
// Get the system information
useEffect(() => {
if (systemInfo !== undefined || sessionId === undefined) {
return;
}
fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
})
.then(response => response.json())
.then(data => {
setSystemInfo(data);
})
.catch(error => {
console.error('Error obtaining system information:', error);
setSnack("Unable to obtain system information.", "error");
});
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
// Set the initial chat history to "loading" or the welcome message if loaded. // Set the initial chat history to "loading" or the welcome message if loaded.
useEffect(() => { useEffect(() => {
if (sessionId === undefined) { if (sessionId === undefined) {
@ -468,7 +539,7 @@ const App = () => {
const drawer = ( const drawer = (
<> <>
{sessionId !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt }} />} {sessionId !== undefined && systemInfo !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt, systemInfo }} />}
</> </>
); );
@ -735,7 +806,8 @@ const App = () => {
)} )}
{message.role === 'assistant' ? ( {message.role === 'assistant' ? (
<div className="markdown-content"> <div className="markdown-content">
<Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} /> <Markdown children={formattedContent} />
{/* <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} /> */}
</div> </div>
) : ( ) : (
<div>{formattedContent}</div> <div>{formattedContent}</div>

View File

@ -3,7 +3,6 @@
# Standard library modules (no try-except needed) # Standard library modules (no try-except needed)
import argparse import argparse
import asyncio import asyncio
import anyio
import json import json
import logging import logging
import os import os
@ -15,6 +14,8 @@ import textwrap
import threading import threading
import uuid import uuid
import random import random
import subprocess
import re
def try_import(module_name, pip_name=None): def try_import(module_name, pip_name=None):
try: try:
@ -26,7 +27,6 @@ def try_import(module_name, pip_name=None):
# Third-party modules with import checks # Third-party modules with import checks
try_import('gradio') try_import('gradio')
try_import('ollama') try_import('ollama')
try_import('openai')
try_import('pytz') try_import('pytz')
try_import('requests') try_import('requests')
try_import('yfinance', 'yfinance') try_import('yfinance', 'yfinance')
@ -35,13 +35,13 @@ try_import('geopy', 'geopy')
try_import('hyphen', 'PyHyphen') try_import('hyphen', 'PyHyphen')
try_import('bs4', 'beautifulsoup4') try_import('bs4', 'beautifulsoup4')
try_import('nltk') try_import('nltk')
try_import('fastapi')
import nltk import nltk
from dotenv import load_dotenv from dotenv import load_dotenv
from geopy.geocoders import Nominatim from geopy.geocoders import Nominatim
import gradio as gr import gradio as gr
import ollama import ollama
import openai
import pytz import pytz
import requests import requests
import yfinance as yf import yfinance as yf
@ -50,6 +50,7 @@ from bs4 import BeautifulSoup
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from utils import rag
from tools import ( from tools import (
get_weather_by_location, get_weather_by_location,
@ -63,11 +64,59 @@ rags = [
{ "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." }, { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
] ]
def get_installed_ram():
try:
with open('/proc/meminfo', 'r') as f:
meminfo = f.read()
match = re.search(r'MemTotal:\s+(\d+)', meminfo)
if match:
return f"{round(int(match.group(1)) / 1024**2, 2)}GB" # Convert KB to GB
except Exception as e:
return f"Error retrieving RAM: {e}"
def get_graphics_cards():
gpus = []
try:
# Run the ze-monitor utility
result = subprocess.run(['ze-monitor'], capture_output=True, text=True, check=True)
# Clean up the output (remove leading/trailing whitespace and newlines)
output = result.stdout.strip()
for line in output.splitlines():
# Updated regex to handle GPU names containing parentheses
match = re.match(r'^[^(]*\((.*)\)', line)
if match:
gpus.append(match.group(1))
return gpus
except Exception as e:
return f"Error retrieving GPU info: {e}"
def get_cpu_info():
try:
with open('/proc/cpuinfo', 'r') as f:
cpuinfo = f.read()
model_match = re.search(r'model name\s+:\s+(.+)', cpuinfo)
cores_match = re.findall(r'processor\s+:\s+\d+', cpuinfo)
if model_match and cores_match:
return f"{model_match.group(1)} with {len(cores_match)} cores"
except Exception as e:
return f"Error retrieving CPU info: {e}"
def system_info():
return {
"Installed RAM": get_installed_ram(),
"Graphics Card": get_graphics_cards(),
"CPU": get_cpu_info()
}
# %% # %%
# Defaults # Defaults
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
#MODEL_NAME = "deepseek-r1:7b" #MODEL_NAME = "deepseek-r1:7b"
MODEL_NAME = "llama3.2" #MODEL_NAME = "llama3.2"
MODEL_NAME = "qwen2.5:7b"
LOG_LEVEL="debug" LOG_LEVEL="debug"
USE_TLS=False USE_TLS=False
WEB_HOST="0.0.0.0" WEB_HOST="0.0.0.0"
@ -420,6 +469,10 @@ class WebServer:
system_prompt = context["system"][0]["content"]; system_prompt = context["system"][0]["content"];
return JSONResponse({ "system-prompt": system_prompt }) return JSONResponse({ "system-prompt": system_prompt })
@self.app.get('/api/system-info/{context_id}')
async def get_system_info(context_id: str):
return JSONResponse(system_info())
@self.app.post('/api/chat/{context_id}') @self.app.post('/api/chat/{context_id}')
async def chat_endpoint(context_id: str, request: Request): async def chat_endpoint(context_id: str, request: Request):
context = self.upsert_context(context_id) context = self.upsert_context(context_id)
@ -662,5 +715,4 @@ def main():
logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}") logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
web_server.run(host=args.web_host, port=args.web_port, use_reloader=False) web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)
# Run the main function using anyio
main() main()

View File

@ -2,9 +2,7 @@
from . import defines from . import defines
# Import rest as `utils.*` accessible # Import rest as `utils.*` accessible
from .chunk import * from .rag import *
from .rss import *
from .chroma import *
# Expose only public names (avoid importing hidden/internal names) # Expose only public names (avoid importing hidden/internal names)
__all__ = [name for name in dir() if not name.startswith("_")] __all__ = [name for name in dir() if not name.startswith("_")]

View File

@ -1,4 +1,4 @@
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
model="deepseek-r1:7b" model="qwen2.5:7b"
encoding_model="mxbai-embed-large" encoding_model="mxbai-embed-large"
persist_directory="./chromadb" persist_directory="./chromadb"

1
src/utils/rag.py Normal file
View File

@ -0,0 +1 @@
rag = "exists"