Docs, trademarks, and system info output
This commit is contained in:
parent
f5ce84a310
commit
973b442642
32
Dockerfile
32
Dockerfile
@ -120,7 +120,7 @@ RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash; fi' ; \
|
||||
} > /opt/ipex-llm/shell ; \
|
||||
chmod +x /opt/ipex-llm/shell
|
||||
|
||||
@ -214,7 +214,7 @@ RUN pip install "transformers>=4.45.1"
|
||||
RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
|
||||
|
||||
# Install ollama python module
|
||||
RUN pip install ollama
|
||||
RUN pip install ollama langchain-ollama
|
||||
|
||||
# pydle does not work with newer asyncio due to coroutine
|
||||
# being deprecated. Patch to work.
|
||||
@ -226,7 +226,7 @@ RUN pip install pydle \
|
||||
&& rm /opt/pydle.patch
|
||||
|
||||
RUN pip install setuptools --upgrade
|
||||
RUN pip install ollama
|
||||
RUN pip install ollama langchain-ollama
|
||||
RUN pip install feedparser bs4 chromadb
|
||||
RUN pip install tiktoken
|
||||
RUN pip install flask flask_cors flask_sock
|
||||
@ -281,6 +281,7 @@ RUN apt-get update \
|
||||
|
||||
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
|
||||
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
|
||||
RUN usermod -aG ze-monitor root
|
||||
|
||||
COPY /src/ /opt/airc/src/
|
||||
|
||||
@ -345,7 +346,9 @@ RUN apt-get update \
|
||||
WORKDIR /opt/ollama
|
||||
|
||||
# Download the nightly ollama release from ipex-llm
|
||||
RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
|
||||
#ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz
|
||||
ENV OLLAMA_VERSION=https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-ipex-llm-2.2.0b20250313-ubuntu.tgz
|
||||
RUN wget -qO - ${OLLAMA_VERSION} | \
|
||||
tar --strip-components=1 -C . -xzv
|
||||
|
||||
# Install Python from Oracular (ollama works with 3.12)
|
||||
@ -367,7 +370,7 @@ RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/ollama/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
echo 'if [[ "${1}" != "" ]]; then bash -c ${*}; else bash; fi' ; \
|
||||
} > /opt/ollama/shell ; \
|
||||
chmod +x /opt/ollama/shell
|
||||
|
||||
@ -375,7 +378,7 @@ RUN { \
|
||||
SHELL [ "/opt/ollama/shell" ]
|
||||
|
||||
# Install ollama python module
|
||||
RUN pip install ollama
|
||||
RUN pip install ollama langchain-ollama
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
@ -393,10 +396,11 @@ RUN { \
|
||||
echo ''; \
|
||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
|
||||
echo ' echo "Dropping to shell"'; \
|
||||
echo ' exec /bin/bash'; \
|
||||
echo ' shift'; \
|
||||
echo ' if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \
|
||||
echo 'else'; \
|
||||
echo ' echo "Launching Ollama server..."'; \
|
||||
echo ' exec ./ollama serve'; \
|
||||
echo ' exec ollama serve'; \
|
||||
echo 'fi'; \
|
||||
} > /entrypoint.sh \
|
||||
&& chmod +x /entrypoint.sh
|
||||
@ -407,8 +411,11 @@ RUN { \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/ollama"'; \
|
||||
echo 'source /opt/ollama/venv/bin/activate'; \
|
||||
echo './ollama pull mxbai-embed-large' ; \
|
||||
echo './ollama pull deepseek-r1:7b' ; \
|
||||
echo 'ollama pull qwen2.5:7b' ; \
|
||||
echo 'ollama pull llama3.2' ; \
|
||||
echo 'ollama pull mxbai-embed-large' ; \
|
||||
echo 'ollama pull deepseek-r1:7b' ; \
|
||||
echo 'ollama pull mistral:7b' ; \
|
||||
} > /fetch-models.sh \
|
||||
&& chmod +x /fetch-models.sh
|
||||
|
||||
@ -416,6 +423,8 @@ ENV PYTHONUNBUFFERED=1
|
||||
|
||||
VOLUME [" /root/.ollama" ]
|
||||
|
||||
ENV PATH=/opt/ollama:${PATH}
|
||||
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||
|
||||
FROM airc AS jupyter
|
||||
@ -455,7 +464,6 @@ RUN { \
|
||||
echo ' --notebook-dir=/opt/jupyter \' ; \
|
||||
echo ' --port 8888 \' ; \
|
||||
echo ' --ip 0.0.0.0 \' ; \
|
||||
echo ' --no-browser \' ; \
|
||||
echo ' --allow-root \' ; \
|
||||
echo ' --ServerApp.token= \' ; \
|
||||
echo ' --ServerApp.password= \' ; \
|
||||
@ -469,6 +477,8 @@ RUN { \
|
||||
} > /entrypoint-jupyter.sh \
|
||||
&& chmod +x /entrypoint-jupyter.sh
|
||||
|
||||
# echo ' --no-browser \' ; \
|
||||
|
||||
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
|
||||
|
||||
FROM ubuntu:oracular AS miniircd
|
||||
|
46
README.md
46
README.md
@ -2,9 +2,9 @@
|
||||
|
||||
AI is Really Cool
|
||||
|
||||
This project provides a simple IRC chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
|
||||
This project provides an AI chat client. It runs the neuralchat model, enhanced with a little bit of RAG to fetch news RSS feeds.
|
||||
|
||||
Internally, it is built using PyTorch 2.6 and the Intel IPEX/LLM.
|
||||
Internally, it is built using PyTorch 2.6, Intel IPEX/LLM, and Python 3.11 (several pip packages were not yet available for Python 3.12 shipped with Ubuntu Oracular 24.10, which these containers are based on.)
|
||||
|
||||
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized. It has been a while since I've had an A series GPU to test on, so if you run into problems please file an [issue](https://github.com/jketreno/airc/issues)--I have some routines I can put in, but don't have a way to test them.
|
||||
|
||||
@ -31,16 +31,56 @@ cd airc
|
||||
docker compose build
|
||||
```
|
||||
|
||||
## Containers
|
||||
|
||||
This project provides the following containers:
|
||||
|
||||
| Container | Purpose |
|
||||
|:----------|:---------------------------------------------------------------|
|
||||
| airc | Base container with GPU packages installed and configured |
|
||||
| jupyter | airc + Jupyter notebook for running Jupyter sessions |
|
||||
| miniircd | Tiny deployment of an IRC server for testing IRC agents |
|
||||
| ollama | Installation of Intel's pre-built Ollama.cpp |
|
||||
|
||||
While developing airc, sometimes Hugging Face is used directly with models loaded via PyTorch. At other times, especially during rapid-development, the ollama deployment is used. This combination allows you to easily access GPUs running either locally (via the local ollama or HF code)
|
||||
|
||||
To see which models are easily deployable with Ollama, see the [Ollama Model List](https://ollama.com/search).
|
||||
|
||||
Prior to using a new model, you need to download it:
|
||||
|
||||
```bash
|
||||
MODEL=qwen2.5:7b
|
||||
docker compose exec -it ollama ollama pull ${MODEL}
|
||||
```
|
||||
|
||||
To download many common models for testing against, you can use the `fetch-models.sh` script which will download:
|
||||
|
||||
* qwen2.5:7b
|
||||
* llama3.2
|
||||
* mxbai-embed-large
|
||||
* deepseek-r1:7b
|
||||
* mistral:7b
|
||||
|
||||
```bash
|
||||
docker compose exec -it ollama /fetch-models.sh
|
||||
```
|
||||
|
||||
The persisted volume mount can grow quite large with models, GPU kernel caching, etc. During the development of this project, the `./cache` directory has grown to consume ~250G of disk space.
|
||||
|
||||
## Running
|
||||
|
||||
In order to download the models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
|
||||
In order to download Hugging Face models, you need to have a Hugging Face token. See https://huggingface.co/settings/tokens for information on obtaining a token.
|
||||
|
||||
Edit .env to add the following:
|
||||
|
||||
```.env
|
||||
HF_ACCESS_TOKEN=<access token from huggingface>
|
||||
HF_HOME=/root/.cache
|
||||
```
|
||||
|
||||
HF_HOME is set for running in the containers to point to a volume mounted
|
||||
directory which will enable model downloads to be persisted.
|
||||
|
||||
NOTE: Models downloaded by most examples will be placed in the ./cache directory, which is bind mounted to the container.
|
||||
|
||||
### AIRC
|
||||
|
@ -21,7 +21,7 @@ services:
|
||||
- ./src:/opt/airc/src:rw
|
||||
- ./doc:/opt/airc/doc:ro
|
||||
- ./results:/opt/airc/results:rw
|
||||
cap_add: # used for running ze-monitor within airc container
|
||||
cap_add: # used for running ze-monitor within container
|
||||
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
||||
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
||||
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
|
||||
@ -47,7 +47,7 @@ services:
|
||||
volumes:
|
||||
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache
|
||||
- ./ollama:/root/.ollama # Cache the ollama models
|
||||
cap_add: # used for running ze-monitor within airc container
|
||||
cap_add: # used for running ze-monitor within container
|
||||
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
||||
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
||||
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
|
||||
@ -84,7 +84,11 @@ services:
|
||||
memory: "0" # No reserved memory (optional)
|
||||
ulimits:
|
||||
memlock: -1 # Prevents memory from being locked
|
||||
oom_kill_disable: true # Prevents OOM killer from killing the container
|
||||
#oom_kill_disable: true # Prevents OOM killer from killing the container
|
||||
cap_add: # used for running ze-monitor within container
|
||||
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
||||
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
||||
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
|
||||
|
||||
miniircd:
|
||||
build:
|
||||
|
@ -10,6 +10,32 @@ div {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.SystemInfo {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
padding: 5px;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.SystemInfoItem {
|
||||
display: flex; /* Grid for individual items */
|
||||
flex-direction: row;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
.SystemInfoItem > div:first-child {
|
||||
display: flex;
|
||||
justify-self: end; /* Align the first column content to the right */
|
||||
width: 10rem;
|
||||
}
|
||||
|
||||
.SystemInfoItem > div:last-child {
|
||||
display: flex;
|
||||
flex-grow: 1;
|
||||
justify-self: end; /* Align the first column content to the right */
|
||||
}
|
||||
|
||||
.ChatBox {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
|
@ -1,4 +1,4 @@
|
||||
import React, { useState, useEffect, useRef, useCallback } from 'react';
|
||||
import React, { useState, useEffect, useRef, useCallback, ReactElement } from 'react';
|
||||
import FormGroup from '@mui/material/FormGroup';
|
||||
import FormControlLabel from '@mui/material/FormControlLabel';
|
||||
import { useTheme } from '@mui/material';
|
||||
@ -70,6 +70,7 @@ interface ControlsParams {
|
||||
tools: Tool[],
|
||||
rags: Tool[],
|
||||
systemPrompt: string,
|
||||
systemInfo: SystemInfo,
|
||||
toggleTool: (tool: Tool) => void,
|
||||
toggleRag: (tool: Tool) => void,
|
||||
setRags: (rags: Tool[]) => void,
|
||||
@ -77,7 +78,50 @@ interface ControlsParams {
|
||||
reset: (types: ("rags" | "tools" | "history" | "system-prompt")[], message: string) => Promise<void>
|
||||
};
|
||||
|
||||
const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset }: ControlsParams) => {
|
||||
type SystemInfo = {
|
||||
"Installed RAM (GB)": string,
|
||||
"Graphics Cards": string[],
|
||||
"CPU": string
|
||||
};
|
||||
|
||||
const SystemInfoComponent: React.FC<{ systemInfo: SystemInfo }> = ({ systemInfo }) => {
|
||||
const [systemElements, setSystemElements] = useState<ReactElement[]>([]);
|
||||
|
||||
const convertToSymbols = (text: string) => {
|
||||
return text
|
||||
.replace(/\(R\)/g, '®') // Replace (R) with the ® symbol
|
||||
.replace(/\(C\)/g, '©') // Replace (C) with the © symbol
|
||||
.replace(/\(TM\)/g, '™'); // Replace (TM) with the ™ symbol
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const elements = Object.entries(systemInfo).flatMap(([k, v]) => {
|
||||
// If v is an array, repeat for each card
|
||||
if (Array.isArray(v)) {
|
||||
return v.map((card, index) => (
|
||||
<div key={index} className="SystemInfoItem">
|
||||
<div>{convertToSymbols(k)} {index}</div>
|
||||
<div>{convertToSymbols(card)}</div>
|
||||
</div>
|
||||
));
|
||||
}
|
||||
|
||||
// If it's not an array, handle normally
|
||||
return (
|
||||
<div key={k} className="SystemInfoItem">
|
||||
<div>{convertToSymbols(k)}</div>
|
||||
<div>{convertToSymbols(String(v))}</div>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
|
||||
setSystemElements(elements);
|
||||
}, [systemInfo]);
|
||||
|
||||
return <div className="SystemInfo">{systemElements}</div>;
|
||||
};
|
||||
|
||||
const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemPrompt, reset, systemInfo }: ControlsParams) => {
|
||||
const [editSystemPrompt, setEditSystemPrompt] = useState<string>(systemPrompt);
|
||||
|
||||
useEffect(() => {
|
||||
@ -110,9 +154,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
|
||||
|
||||
</Typography>
|
||||
<Accordion>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Typography component="span">System Prompt</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionActions style={{ flexDirection: "column" }}>
|
||||
@ -135,9 +177,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
|
||||
</AccordionActions>
|
||||
</Accordion>
|
||||
<Accordion>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Typography component="span">Tools</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
@ -157,9 +197,7 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
|
||||
</AccordionActions>
|
||||
</Accordion>
|
||||
<Accordion>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Typography component="span">RAG</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
@ -178,6 +216,17 @@ const Controls = ({ tools, rags, systemPrompt, toggleTool, toggleRag, setSystemP
|
||||
}</FormGroup>
|
||||
</AccordionActions>
|
||||
</Accordion>
|
||||
<Accordion>
|
||||
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||
<Typography component="span">System Information</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
The server is running on the following hardware:
|
||||
</AccordionDetails>
|
||||
<AccordionActions>
|
||||
<SystemInfoComponent systemInfo={systemInfo} />
|
||||
</AccordionActions>
|
||||
</Accordion>
|
||||
<Button onClick={() => { reset(["history"], "History cleared."); }}>Clear Chat History</Button>
|
||||
<Button onClick={() => { reset(["rags", "tools", "system-prompt"], "Default settings restored.") }}>Reset to defaults</Button>
|
||||
</div>);
|
||||
@ -199,6 +248,7 @@ const App = () => {
|
||||
const [rags, setRags] = useState<Tool[]>([]);
|
||||
const [systemPrompt, setSystemPrompt] = useState<string>("");
|
||||
const [serverSystemPrompt, setServerSystemPrompt] = useState<string>("");
|
||||
const [systemInfo, setSystemInfo] = useState<SystemInfo | undefined>(undefined);
|
||||
|
||||
// Scroll to bottom of conversation when conversation updates
|
||||
useEffect(() => {
|
||||
@ -214,6 +264,27 @@ const App = () => {
|
||||
setSnackOpen(true);
|
||||
}, []);
|
||||
|
||||
// Get the system information
|
||||
useEffect(() => {
|
||||
if (systemInfo !== undefined || sessionId === undefined) {
|
||||
return;
|
||||
}
|
||||
fetch(getConnectionBase(loc) + `/api/system-info/${sessionId}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
setSystemInfo(data);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error obtaining system information:', error);
|
||||
setSnack("Unable to obtain system information.", "error");
|
||||
});
|
||||
}, [systemInfo, setSystemInfo, loc, setSnack, sessionId])
|
||||
|
||||
// Set the initial chat history to "loading" or the welcome message if loaded.
|
||||
useEffect(() => {
|
||||
if (sessionId === undefined) {
|
||||
@ -468,7 +539,7 @@ const App = () => {
|
||||
|
||||
const drawer = (
|
||||
<>
|
||||
{sessionId !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt }} />}
|
||||
{sessionId !== undefined && systemInfo !== undefined && <Controls {...{ tools, rags, reset, systemPrompt, toggleTool, toggleRag, setRags, setSystemPrompt, systemInfo }} />}
|
||||
</>
|
||||
);
|
||||
|
||||
@ -735,7 +806,8 @@ const App = () => {
|
||||
)}
|
||||
{message.role === 'assistant' ? (
|
||||
<div className="markdown-content">
|
||||
<Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} />
|
||||
<Markdown children={formattedContent} />
|
||||
{/* <Markdown remarkPlugins={[remarkMath]} rehypePlugins={[rehypeKatex]} children={formattedContent} /> */}
|
||||
</div>
|
||||
) : (
|
||||
<div>{formattedContent}</div>
|
||||
|
@ -3,7 +3,6 @@
|
||||
# Standard library modules (no try-except needed)
|
||||
import argparse
|
||||
import asyncio
|
||||
import anyio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@ -15,6 +14,8 @@ import textwrap
|
||||
import threading
|
||||
import uuid
|
||||
import random
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
def try_import(module_name, pip_name=None):
|
||||
try:
|
||||
@ -26,7 +27,6 @@ def try_import(module_name, pip_name=None):
|
||||
# Third-party modules with import checks
|
||||
try_import('gradio')
|
||||
try_import('ollama')
|
||||
try_import('openai')
|
||||
try_import('pytz')
|
||||
try_import('requests')
|
||||
try_import('yfinance', 'yfinance')
|
||||
@ -35,13 +35,13 @@ try_import('geopy', 'geopy')
|
||||
try_import('hyphen', 'PyHyphen')
|
||||
try_import('bs4', 'beautifulsoup4')
|
||||
try_import('nltk')
|
||||
try_import('fastapi')
|
||||
|
||||
import nltk
|
||||
from dotenv import load_dotenv
|
||||
from geopy.geocoders import Nominatim
|
||||
import gradio as gr
|
||||
import ollama
|
||||
import openai
|
||||
import pytz
|
||||
import requests
|
||||
import yfinance as yf
|
||||
@ -50,6 +50,7 @@ from bs4 import BeautifulSoup
|
||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
||||
from fastapi.responses import JSONResponse, StreamingResponse, FileResponse, RedirectResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from utils import rag
|
||||
|
||||
from tools import (
|
||||
get_weather_by_location,
|
||||
@ -63,11 +64,59 @@ rags = [
|
||||
{ "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
|
||||
]
|
||||
|
||||
|
||||
def get_installed_ram():
|
||||
try:
|
||||
with open('/proc/meminfo', 'r') as f:
|
||||
meminfo = f.read()
|
||||
match = re.search(r'MemTotal:\s+(\d+)', meminfo)
|
||||
if match:
|
||||
return f"{round(int(match.group(1)) / 1024**2, 2)}GB" # Convert KB to GB
|
||||
except Exception as e:
|
||||
return f"Error retrieving RAM: {e}"
|
||||
|
||||
def get_graphics_cards():
|
||||
gpus = []
|
||||
try:
|
||||
# Run the ze-monitor utility
|
||||
result = subprocess.run(['ze-monitor'], capture_output=True, text=True, check=True)
|
||||
|
||||
# Clean up the output (remove leading/trailing whitespace and newlines)
|
||||
output = result.stdout.strip()
|
||||
for line in output.splitlines():
|
||||
# Updated regex to handle GPU names containing parentheses
|
||||
match = re.match(r'^[^(]*\((.*)\)', line)
|
||||
if match:
|
||||
gpus.append(match.group(1))
|
||||
|
||||
return gpus
|
||||
except Exception as e:
|
||||
return f"Error retrieving GPU info: {e}"
|
||||
|
||||
def get_cpu_info():
|
||||
try:
|
||||
with open('/proc/cpuinfo', 'r') as f:
|
||||
cpuinfo = f.read()
|
||||
model_match = re.search(r'model name\s+:\s+(.+)', cpuinfo)
|
||||
cores_match = re.findall(r'processor\s+:\s+\d+', cpuinfo)
|
||||
if model_match and cores_match:
|
||||
return f"{model_match.group(1)} with {len(cores_match)} cores"
|
||||
except Exception as e:
|
||||
return f"Error retrieving CPU info: {e}"
|
||||
|
||||
def system_info():
|
||||
return {
|
||||
"Installed RAM": get_installed_ram(),
|
||||
"Graphics Card": get_graphics_cards(),
|
||||
"CPU": get_cpu_info()
|
||||
}
|
||||
|
||||
# %%
|
||||
# Defaults
|
||||
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
|
||||
#MODEL_NAME = "deepseek-r1:7b"
|
||||
MODEL_NAME = "llama3.2"
|
||||
#MODEL_NAME = "llama3.2"
|
||||
MODEL_NAME = "qwen2.5:7b"
|
||||
LOG_LEVEL="debug"
|
||||
USE_TLS=False
|
||||
WEB_HOST="0.0.0.0"
|
||||
@ -419,7 +468,11 @@ class WebServer:
|
||||
context = self.upsert_context(context_id)
|
||||
system_prompt = context["system"][0]["content"];
|
||||
return JSONResponse({ "system-prompt": system_prompt })
|
||||
|
||||
|
||||
@self.app.get('/api/system-info/{context_id}')
|
||||
async def get_system_info(context_id: str):
|
||||
return JSONResponse(system_info())
|
||||
|
||||
@self.app.post('/api/chat/{context_id}')
|
||||
async def chat_endpoint(context_id: str, request: Request):
|
||||
context = self.upsert_context(context_id)
|
||||
@ -662,5 +715,4 @@ def main():
|
||||
logging.info(f"Starting web server at http://{args.web_host}:{args.web_port}")
|
||||
web_server.run(host=args.web_host, port=args.web_port, use_reloader=False)
|
||||
|
||||
# Run the main function using anyio
|
||||
main()
|
||||
|
@ -2,9 +2,7 @@
|
||||
from . import defines
|
||||
|
||||
# Import rest as `utils.*` accessible
|
||||
from .chunk import *
|
||||
from .rss import *
|
||||
from .chroma import *
|
||||
from .rag import *
|
||||
|
||||
# Expose only public names (avoid importing hidden/internal names)
|
||||
__all__ = [name for name in dir() if not name.startswith("_")]
|
||||
|
@ -1,4 +1,4 @@
|
||||
ollama_api_url="http://ollama:11434" # Default Ollama local endpoint
|
||||
model="deepseek-r1:7b"
|
||||
model="qwen2.5:7b"
|
||||
encoding_model="mxbai-embed-large"
|
||||
persist_directory="./chromadb"
|
1
src/utils/rag.py
Normal file
1
src/utils/rag.py
Normal file
@ -0,0 +1 @@
|
||||
rag = "exists"
|
Loading…
x
Reference in New Issue
Block a user