This commit is contained in:
James Ketr 2025-04-04 22:31:51 -07:00
parent 5191157684
commit 5b84aa9ee1
124 changed files with 288 additions and 802771 deletions

View File

@ -216,15 +216,6 @@ RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/release
# Install ollama python module
RUN pip install ollama langchain-ollama
# pydle does not work with newer asyncio due to coroutine
# being deprecated. Patch to work.
COPY /src/pydle.patch /opt/pydle.patch
RUN pip install pydle \
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
-p1 < /opt/pydle.patch \
&& rm /opt/pydle.patch
RUN pip install setuptools --upgrade
RUN pip install ollama langchain-ollama
RUN pip install feedparser bs4 chromadb

View File

@ -1,444 +0,0 @@
FROM ubuntu:oracular AS pytorch-build
SHELL [ "/bin/bash", "-c" ]
# Instructions Dockerfied from:
#
# https://github.com/pytorch/pytorch
#
# and
#
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
#
#
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
ca-certificates \
ccache \
cmake \
curl \
git \
gpg-agent \
less \
libbz2-dev \
libffi-dev \
libjpeg-dev \
libpng-dev \
libreadline-dev \
libssl-dev \
libsqlite3-dev \
llvm \
nano \
wget \
zlib1g-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# python3 \
# python3-pip \
# python3-venv \
# python3-dev \
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
# Build Python in /opt/..., install it locally, then remove the build environment
# collapsed to a single docker layer.
WORKDIR /opt
ENV PYTHON_VERSION=3.11.9
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
&& cd Python-${PYTHON_VERSION} \
&& ./configure --prefix=/opt/python --enable-optimizations \
&& make -j$(nproc) \
&& make install \
&& cd /opt \
&& rm -rf Python-${PYTHON_VERSION}
WORKDIR /opt/pytorch
FROM ubuntu:oracular AS ze-monitor
# From https://github.com/jketreno/ze-monitor
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
debhelper \
devscripts \
cmake \
git \
libfmt-dev \
libncurses-dev \
rpm \
rpm2cpio \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN apt-get install -y \
software-properties-common \
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& apt-get update \
&& apt-get install -y \
libze-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN git clone --depth 1 --branch v0.3.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor
WORKDIR /opt/ze-monitor/build
RUN cmake .. \
&& make \
&& cpack
FROM pytorch-build AS pytorch
COPY --from=pytorch-build /opt/pytorch /opt/pytorch
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libze-intel-gpu1 \
libze1 \
intel-ocloc \
intel-opencl-icd \
xpu-smi \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
# When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries.
ENV SYCL_CACHE_PERSISTENT=1
WORKDIR /opt/pytorch
RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/pytorch/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/pytorch/shell ; \
chmod +x /opt/pytorch/shell
RUN python3 -m venv --system-site-packages /opt/pytorch/venv
SHELL [ "/opt/pytorch/shell" ]
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
RUN pip3 freeze > /opt/pytorch/requirements.txt
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'echo "Container: pytorch"' ; \
echo 'set -e' ; \
echo 'echo "Setting pip environment to /opt/pytorch"' ; \
echo 'source /opt/pytorch/venv/bin/activate'; \
echo 'if [[ "${1}" == "" ]] || [[ "${1}" == "shell" ]]; then' ; \
echo ' echo "Dropping to shell"' ; \
echo ' /bin/bash -c "source /opt/pytorch/venv/bin/activate ; /bin/bash"' ; \
echo 'else' ; \
echo ' exec "${@}"' ; \
echo 'fi' ; \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
ENTRYPOINT [ "/entrypoint.sh" ]
FROM pytorch AS ipex-llm-src
# Build ipex-llm from source
RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
&& cd /opt/ipex-llm \
&& git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
&& git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
WORKDIR /opt/ipex-llm
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/ipex-llm/shell ; \
chmod +x /opt/ipex-llm/shell
SHELL [ "/opt/ipex-llm/shell" ]
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
WORKDIR /opt/ipex-llm/python/llm
RUN pip install requests wheel
RUN python setup.py clean --all bdist_wheel --linux
FROM airc AS jupyter
SHELL [ "/opt/airc/shell" ]
# BEGIN setup Jupyter
RUN pip install jupyter \
jupyterlab==4.3.0a0 \
jupyterhub==5.0.0 \
notebook==7.3.0a0 \
"jupyter-server-proxy>=4.1.2"
# END setup Jupyter
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'echo "Container: airc jupyter"' ; \
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
echo ' exit 1' ; \
echo ' else' ; \
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
echo ' fi' ; \
echo 'fi' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
echo 'while true; do' ; \
echo ' echo "Launching jupyter notebook"' ; \
echo ' jupyter notebook \' ; \
echo ' --notebook-dir=/opt/jupyter \' ; \
echo ' --port 8888 \' ; \
echo ' --ip 0.0.0.0 \' ; \
echo ' --no-browser \' ; \
echo ' --allow-root \' ; \
echo ' --ServerApp.token= \' ; \
echo ' --ServerApp.password= \' ; \
echo ' --ServerApp.allow_origin=* \' ; \
echo ' --ServerApp.base_url="/jupyter" \' ; \
echo ' "${@}" \' ; \
echo ' 2>&1 | tee -a "/root/.cache/jupyter.log"' ; \
echo ' echo "jupyter notebook died ($?). Restarting."' ; \
echo ' sleep 5' ; \
echo 'done' ; \
} > /entrypoint-jupyter.sh \
&& chmod +x /entrypoint-jupyter.sh
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
FROM pytorch AS airc
RUN python3 -m venv --system-site-packages /opt/airc/venv
# Don't install the full oneapi essentials; just the ones that we seem to need
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
| tee /etc/apt/sources.list.d/oneAPI.list \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
intel-oneapi-mkl-sycl-2025.0 \
intel-oneapi-dnnl-2025.0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'if [[ "$1" == "" ]]; then bash -c; else bash -c "${@}"; fi' ; \
} > /opt/airc/shell ; \
chmod +x /opt/airc/shell
SHELL [ "/opt/airc/shell" ]
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
# Install ipex-llm built in ipex-llm-src
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
COPY src/ /opt/airc/src/
# pydle does not work with newer asyncio due to coroutine
# being deprecated. Patch to work.
RUN pip3 install pydle transformers sentencepiece accelerate \
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
-p1 < /opt/airc/src/pydle.patch
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
# as well as MistralSpda* things missing
RUN pip install "sentence_transformers<3.4.1" "transformers==4.40.0"
# To get xe_linear and other Xe methods
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
# trl.core doesn't have what is needed with the default 'pip install trl' version
RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
# Needed by src/model-server.py
RUN pip install flask
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'set -e' ; \
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
echo ' exit 1' ; \
echo ' else' ; \
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
echo ' fi' ; \
echo 'fi' ; \
echo 'echo "Container: airc"' ; \
echo 'echo "Setting pip environment to /opt/airc"' ; \
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
echo 'source /opt/airc/venv/bin/activate'; \
echo 'if [[ "${1}" == "shell" ]] || [[ "${1}" == "/bin/bash" ]]; then' ; \
echo ' echo "Dropping to shell"' ; \
echo ' /bin/bash -c "source /opt/airc/venv/bin/activate ; /bin/bash"' ; \
echo ' exit $?' ; \
echo 'else' ; \
echo ' while true; do' ; \
echo ' echo "Launching model-server"' ; \
echo ' python src/model-server.py \' ; \
echo ' 2>&1 | tee -a "/root/.cache/model-server.log"'; \
echo ' echo "model-server died ($?). Restarting."' ; \
echo ' sleep 5' ; \
echo ' done &' ; \
echo ' while true; do' ; \
echo ' echo "Launching airc"' ; \
echo ' python src/airc.py "${@}" \' ; \
echo ' 2>&1 | tee -a "/root/.cache/airc.log"' ; \
echo ' echo "airc died ($?). Restarting."' ; \
echo ' sleep 5' ; \
echo ' done' ; \
echo 'fi' ; \
} > /entrypoint-airc.sh \
&& chmod +x /entrypoint-airc.sh
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb
WORKDIR /opt/airc
SHELL [ "/opt/airc/shell" ]
# Needed by src/model-server.py
#RUN pip install faiss-cpu sentence_transformers feedparser bs4
RUN pip install ollama feedparser bs4
SHELL [ "/bin/bash", "-c" ]
ENTRYPOINT [ "/entrypoint-airc.sh" ]
FROM ubuntu:oracular AS ollama
# Get a couple prerequisites
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# Install Intel graphics runtimes
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libze-intel-gpu1 \
libze1 \
intel-ocloc \
intel-opencl-icd \
xpu-smi \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
WORKDIR /opt/ollama
# Download the nightly ollama release from ipex-llm
RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
tar --strip-components=1 -C . -xzv
# Install Python from Oracular (ollama works with 3.12)
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
python3 \
python3-pip \
python3-venv \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/ollama/venv
# Setup the docker pip shell
RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/ollama/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/ollama/shell ; \
chmod +x /opt/ollama/shell
# Activate the pip environment on all shell calls
SHELL [ "/opt/ollama/shell" ]
# Install ollama python module
RUN pip3 install ollama
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash'; \
echo 'echo "Container: ollama"'; \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/ollama"'; \
echo 'source /opt/ollama/venv/bin/activate'; \
echo 'export OLLAMA_NUM_GPU=999'; \
echo 'export ZES_ENABLE_SYSMAN=1'; \
echo 'export SYCL_CACHE_PERSISTENT=1'; \
echo 'export OLLAMA_KEEP_ALIVE=-1'; \
echo 'export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1'; \
echo ''; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
echo ' echo "Dropping to shell"'; \
echo ' exec /bin/bash'; \
echo 'else'; \
echo ' echo "Launching Ollama server..."'; \
echo ' exec ./ollama serve'; \
echo 'fi'; \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
RUN { \
echo '#!/bin/bash'; \
echo 'echo "Container: ollama"'; \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/ollama"'; \
echo 'source /opt/ollama/venv/bin/activate'; \
echo './ollama pull mxbai-embed-large' ; \
echo './ollama pull deepseek-r1:7b' ; \
} > /fetch-models.sh \
&& chmod +x /fetch-models.sh
VOLUME [" /root/.ollama" ]
ENTRYPOINT [ "/entrypoint.sh" ]

View File

@ -21,6 +21,7 @@ services:
- ./src:/opt/airc/src:rw
- ./doc:/opt/airc/doc:ro
- ./results:/opt/airc/results:rw
- ./ketr.chat:/opt/airc/ketr-chat:rw # Live mount src
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
@ -76,6 +77,7 @@ services:
- ./jupyter:/opt/jupyter:rw
- ./cache:/root/.cache
- ./src:/opt/airc/src:rw # Live mount src
- ./ketr.chat:/opt/airc/ketr-chat:rw # Live mount src
deploy:
resources:
limits:

View File

File diff suppressed because it is too large Load Diff

View File

@ -1,368 +0,0 @@
# %%
# Imports [standard]
# Standard library modules (no try-except needed)
import argparse
import asyncio
import anyio
import json
import logging
import os
import queue
import re
import time
from datetime import datetime
def try_import(module_name, pip_name=None):
try:
__import__(module_name)
except ImportError:
print(f"Module '{module_name}' not found. Install it using:")
print(f" pip install {pip_name or module_name}")
# Third-party modules with import checks
try_import('gradio')
try_import('ollama')
try_import('openai')
try_import('pydle')
try_import('pytz')
try_import('requests')
try_import('yfinance', 'yfinance')
try_import('dotenv', 'python-dotenv')
try_import('geopy', 'geopy')
from dotenv import load_dotenv
from geopy.geocoders import Nominatim
import gradio as gr
import ollama
import openai
import pydle
import pytz
import requests
import yfinance as yf
# %%
def get_weather_by_location(city, state, country="USA"):
"""
Get weather information from weather.gov based on city, state, and country.
Args:
city (str): City name
state (str): State name or abbreviation
country (str): Country name (defaults to "USA" as weather.gov is for US locations)
Returns:
dict: Weather forecast information
"""
# Step 1: Get coordinates for the location using geocoding
location = f"{city}, {state}, {country}"
coordinates = get_coordinates(location)
if not coordinates:
return {"error": f"Could not find coordinates for {location}"}
# Step 2: Get the forecast grid endpoint for the coordinates
grid_endpoint = get_grid_endpoint(coordinates)
if not grid_endpoint:
return {"error": f"Could not find weather grid for coordinates {coordinates}"}
# Step 3: Get the forecast data from the grid endpoint
forecast = get_forecast(grid_endpoint)
if not forecast['location']:
forecast['location'] = location
return forecast
def get_coordinates(location):
"""Convert a location string to latitude and longitude using Nominatim geocoder."""
try:
# Create a geocoder with a meaningful user agent
geolocator = Nominatim(user_agent="weather_app_example")
# Get the location
location_data = geolocator.geocode(location)
if location_data:
return {
"latitude": location_data.latitude,
"longitude": location_data.longitude
}
else:
print(f"Location not found: {location}")
return None
except Exception as e:
print(f"Error getting coordinates: {e}")
return None
def get_grid_endpoint(coordinates):
"""Get the grid endpoint from weather.gov based on coordinates."""
try:
lat = coordinates["latitude"]
lon = coordinates["longitude"]
# Define headers for the API request
headers = {
"User-Agent": "WeatherAppExample/1.0 (your_email@example.com)",
"Accept": "application/geo+json"
}
# Make the request to get the grid endpoint
url = f"https://api.weather.gov/points/{lat},{lon}"
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
return data["properties"]["forecast"]
else:
print(f"Error getting grid: {response.status_code} - {response.text}")
return None
except Exception as e:
print(f"Error in get_grid_endpoint: {e}")
return None
# Weather related function
def get_forecast(grid_endpoint):
"""Get the forecast data from the grid endpoint."""
try:
# Define headers for the API request
headers = {
"User-Agent": "WeatherAppExample/1.0 (your_email@example.com)",
"Accept": "application/geo+json"
}
# Make the request to get the forecast
response = requests.get(grid_endpoint, headers=headers)
if response.status_code == 200:
data = response.json()
# Extract the relevant forecast information
periods = data["properties"]["periods"]
# Process the forecast data into a simpler format
forecast = {
"location": data["properties"].get("relativeLocation", {}).get("properties", {}),
"updated": data["properties"].get("updated", ""),
"periods": []
}
for period in periods:
forecast["periods"].append({
"name": period.get("name", ""),
"temperature": period.get("temperature", ""),
"temperatureUnit": period.get("temperatureUnit", ""),
"windSpeed": period.get("windSpeed", ""),
"windDirection": period.get("windDirection", ""),
"shortForecast": period.get("shortForecast", ""),
"detailedForecast": period.get("detailedForecast", "")
})
return forecast
else:
print(f"Error getting forecast: {response.status_code} - {response.text}")
return {"error": f"API Error: {response.status_code}"}
except Exception as e:
print(f"Error in get_forecast: {e}")
return {"error": f"Exception: {str(e)}"}
# Example usage
def do_weather():
city = input("Enter city: ")
state = input("Enter state: ")
country = input("Enter country (default USA): ") or "USA"
print(f"Getting weather for {city}, {state}, {country}...")
weather_data = get_weather_by_location(city, state, country)
if "error" in weather_data:
print(f"Error: {weather_data['error']}")
else:
print("\nWeather Forecast:")
print(f"Location: {weather_data.get('location', {}).get('city', city)}, {weather_data.get('location', {}).get('state', state)}")
print(f"Last Updated: {weather_data.get('updated', 'N/A')}")
print("\nForecast Periods:")
for period in weather_data.get("periods", []):
print(f"\n{period['name']}:")
print(f" Temperature: {period['temperature']}{period['temperatureUnit']}")
print(f" Wind: {period['windSpeed']} {period['windDirection']}")
print(f" Forecast: {period['shortForecast']}")
print(f" Details: {period['detailedForecast']}")
# %%
# Stock related function
def get_ticker_price(ticker_symbols):
"""
Look up the current price of a stock using its ticker symbol.
Args:
ticker_symbol (str): The stock ticker symbol (e.g., 'AAPL' for Apple)
Returns:
dict: Current stock information including price
"""
results = []
print(f"get_ticker_price('{ticker_symbols}')")
for ticker_symbol in ticker_symbols.split(','):
ticker_symbol = ticker_symbol.strip()
if ticker_symbol == "":
continue
# Create a Ticker object
try:
ticker = yf.Ticker(ticker_symbol)
print(ticker)
# Get the latest market data
ticker_data = ticker.history(period="1d")
if ticker_data.empty:
results.append({"error": f"No data found for ticker {ticker_symbol}"})
continue
# Get the latest closing price
latest_price = ticker_data['Close'].iloc[-1]
# Get some additional info
info = ticker.info
results.append({ 'symbol': ticker_symbol, 'price': latest_price })
except Exception as e:
results.append({"error": f"Error fetching data for {ticker_symbol}: {str(e)}"})
return results[0] if len(results) == 1 else results
#{
# "symbol": ticker_symbol,
# "price": latest_price,
# "currency": info.get("currency", "Unknown"),
# "company_name": info.get("shortName", "Unknown"),
# "previous_close": info.get("previousClose", "Unknown"),
# "market_cap": info.get("marketCap", "Unknown"),
#}
# %%
def get_current_datetime(timezone="America/Los_Angeles"):
"""
Returns the current date and time in the specified timezone in ISO 8601 format.
Args:
timezone (str): Timezone name (e.g., "UTC", "America/New_York", "Europe/London")
Default is "America/Los_Angeles"
Returns:
str: Current date and time with timezone in the format YYYY-MM-DDTHH:MM:SS+HH:MM
"""
try:
if timezone == 'system' or timezone == '' or not timezone:
timezone = 'America/Los_Angeles'
# Get current UTC time (timezone-aware)
local_tz = pytz.timezone("America/Los_Angeles")
local_now = datetime.now(tz=local_tz)
# Convert to target timezone
target_tz = pytz.timezone(timezone)
target_time = local_now.astimezone(target_tz)
return target_time.isoformat()
except Exception as e:
return {'error': f"Invalid timezone {timezone}: {str(e)}"}
# %%
tools = [ {
"type": "function",
"function": {
"name": "get_ticker_price",
"description": "Get the current stock price of one or more ticker symbols. Returns an array of objects with 'symbol' and 'price' fields. Call this whenever you need to know the latest value of stock ticker symbols, for example when a user asks 'How much is Intel trading at?' or 'What are the prices of AAPL and MSFT?'",
"parameters": {
"type": "object",
"properties": {
"ticker": {
"type": "string",
"description": "The company stock ticker symbol. For multiple tickers, provide a comma-separated list (e.g., 'AAPL,MSFT,GOOGL').",
},
},
"required": ["ticker"],
"additionalProperties": False
}
}
}, {
"type": "function",
"function": {
"name": "summarize_site",
"description": "Requests a second LLM agent to download the requested site and answer a question about the site. For example if the user says 'What are the top headlines on cnn.com?' you would use summarize_site to get the answer.",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The website URL to download and process",
},
"question": {
"type": "string",
"description": "The question to ask the second LLM about the content",
},
},
"required": ["url", "question"],
"additionalProperties": False
},
"returns": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "Identifier for the source LLM"
},
"content": {
"type": "string",
"description": "The complete response from the second LLM"
},
"metadata": {
"type": "object",
"description": "Additional information about the response"
}
}
}
}
}, {
"type": "function",
"function": {
"name": "get_current_datetime",
"description": "Get the current date and time in a specified timezone",
"parameters": {
"type": "object",
"properties": {
"timezone": {
"type": "string",
"description": "Timezone name (e.g., 'UTC', 'America/New_York', 'Europe/London', 'America/Los_Angeles'). Default is 'America/Los_Angeles'."
}
},
"required": []
}
}
}, {
"type": "function",
"function": {
"name": "get_weather_by_location",
"description": "Get the full weather forecast as structured data for a given CITY and STATE location in the United States. For example, if the user asks 'What is the weather in Portland?' or 'What is the forecast for tomorrow?' use the provided data to answer the question.",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "City to find the weather forecast (e.g., 'Portland', 'Seattle')."
},
"state": {
"type": "string",
"description": "State to find the weather forecast (e.g., 'OR', 'WA')."
}
},
"required": [ "city", "state" ],
"additionalProperties": False
}
}
}]
__all__ = [ 'tools', 'get_current_datetime', 'get_weather_by_location', 'get_ticker_price' ]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,57 +0,0 @@
{
"file_path": "../doc/resume/generic.txt",
"processed_at": "2025-03-12T19:22:00.311002",
"context": "# JAMES KETRENOS\nsoftware architect, designer, developer, and team lead\nBeaverton, OR 97003\n\njames@ketrenos.com\n(503) 501 8281\n\nSeeking an opportunity to contribute to the advancement of energy efficient AI solutions, James is a driven problem solver, solution creator, technical leader, and skilled software developer focused on rapid, high-quality results, with an eye toward bringing solutions to the market.\n\n## SUMMARY\n\nProblem-solving: Trusted resource for executive leadership, able to identify opportunities to bridge technical gaps, adopt new technologies, and improve efficiency and quality for internal and external customers.\n\nProficient: Adept in compiled and interpreted languages, the software frameworks built around them, and front- and backend infrastructure. Leveraging deep and varied experience to quickly find solutions. Rapidly familiarizes and puts to use new and emerging technologies.\n\nExperienced: 20+ years of experience as an end-to-end Linux software architect, team lea...",
"qa_pairs": [
{
"question": "What is James' primary role in the software architecture field?",
"answer": "James' primary role in the software architecture field is focused on developing and maintaining the Linux kernel, ensuring compatibility with Intel wireless drivers. He also contributes to hardware design, particularly in the development of the Intel Intercast technology.",
"validation": "**Assessment:**\n\n1. **ACCURACY:** \n The answer accurately mentions James' primary role in the software architecture field, focusing on kernel development for Intel wireless and contributing to hardware design, including Intel Intercast. However, it omits details about other contributions, such as kernel development for other hardware projects.\n\n2. **COMPLETENESS:** \n The answer is incomplete as it does not provide information on James' work in hardware design beyond Intel Intercast, such as kernel development for other hardware.\n\n3. **RELEVANCE:** \n The answer is relevant, covering James' kernel development for Intel wireless and his contribution to hardware design.\n\n4. **NATIVITY:** \n The answer is natural and conversational.\n\n5. **DIVERSITY:** \n The answer is somewhat diverse, covering different areas of software architecture.\n\n6. **IMPROVEMENTS:** \n The answer could include more details on James' contributions to hardware design, such as his kernel development for Intel wireless or other hardware projects.\n\n**Decision:** \nREJECT"
},
{
"question": "Can you explain the development of the 'ze-monitor' application?",
"answer": "The 'ze-monitor' application was developed as a lightweight C++ Linux application that leverages Level Zero Sysman APIs to provide 'top' like device monitoring for Intel GPUs. This project was part of James' experience working with Linux and Level Zero Sysman APIs, focusing on leveraging new technologies and ensuring seamless integration with existing systems. The development process involved identifying opportunities to bridge technical gaps, adopt new technologies, and improve efficiency and quality for internal and external customers. This led to the creation of a lightweight application that integrates Level Zero Sysman APIs, enabling device monitoring for Intel GPUs. The detailed development process involved collaboration with the team, ensuring that the application met the technical requirements and worked seamlessly with existing systems.",
"validation": "**Answer:**\n\nThe 'ze-monitor' application was developed as a lightweight C++ Linux application that leverages Level Zero Sysman APIs to provide device monitoring for Intel GPUs. This project was part of James' experience working with Linux and Level Zero Sysman APIs, focusing on leveraging new technologies and ensuring seamless integration with existing systems. The development process involved identifying opportunities to bridge technical gaps, adopt new technologies, and improve efficiency and quality for internal and external customers. The application was created to integrate Level Zero Sysman APIs, enabling device monitoring for Intel GPUs, while ensuring seamless integration with existing systems.\n\n**Assessment:**\n\n- **VERDICT:** [ACCEPT]\n\n- **REASONING:** The answer is accurate, comprehensive, relevant, and natural. It explains the technical aspects of the application and its development, which is useful for understanding the context provided. It also mentions how the answer was generated and its contribution to the overall knowledge base, making it somewhat complete.\n\n- **IMPROVEMENTS:** To add diversity, the answer could mention other areas where similar applications were developed, or highlight the broader impact of these technologies. For example, mentioning other device monitoring applications developed by James or other developers could add more diversity. Also, including how these technologies contribute to broader areas like cybersecurity or efficiency could enhance the answer's breadth and relevance."
},
{
"question": "How did the 'airc' project utilize James' resume data?",
"answer": "James' resume data was utilized in the development of the 'airc' project, which is a command line client, interactive query system, and IRC bot integration. This project leverages his extensive experience in software architecture and development, particularly in areas such as Linux release infrastructure and system architecture.",
"validation": "- VERDICT: [ACCEPT]\n- REASONING: The answer is accurate, complete, relevant, and natural. It provides specific details and adds some diversity by focusing on a single project and experience area. No further improvements are needed.\n- IMPROVEMENTS: None needed, as the answer meets all criteria effectively.\n\nAnswer:\nJames' resume data was utilized in the development of the 'airc' project, which includes a command line client, interactive query system, and IRC bot integration. This project leverages his extensive experience in software architecture and development, particularly in areas such as Linux release infrastructure and system architecture."
},
{
"question": "What is James' experience with Linux kernel driver integration?",
"answer": "James' experience with Linux kernel driver integration includes working on the Intel Wireless drivers, where he contributed to the development of kernel driver integration, utilizing tools like Linux packaging and managing CI/CD pipelines for release infrastructure.",
"validation": "**Answer:**\n\nJames' experience with Linux kernel driver integration includes his contributions to the development of kernel driver integration for Intel's Wireless drivers and his role in managing CI/CD pipelines for projects like Open Source at Intel.\n\n**Evaluation:**\n\n- **VERDICT:** [ACCEPT]\n- **REASONING:** The answer accurately describes James' experience with kernel driver integration, providing specific examples and context. It is relevant, natural, and adds diversity to his expertise.\n- **IMPROVEMENTS:** If possible, enhancing the answer with more details about the tools or processes used could further enhance naturalness. However, as provided, it is sufficiently detailed.\n- **DIVERSITY:** The answer naturally incorporates James' work in kernel driver integration, which is a natural addition to his role and provides a new angle for understanding his expertise.\n\nThis answer is appropriate and suitable for training purposes."
},
{
"question": "How did James contribute to the development of Intel RealSense hardware?",
"answer": "James contributed to the development of Intel RealSense hardware through the development of a command line client and an IRC bot integration for the 'airc' project.",
"validation": "**Answer:**\n\nJames contributed to the development of Intel RealSense hardware through the creation of a command line client and an IRC bot integration for the 'airc' project. This involved integrating AI capabilities into the 'ze-monitor' project, allowing for device monitoring of Intel GPUs, which was part of the broader effort to advance energy-efficient AI solutions. However, the answer does not directly address how James contributed to Intel RealSense hardware, focusing instead on his other projects. This could be an area for improvement to better align with the context provided.\n\n**Assessment (VERDICT):** \nThe answer is accurate and provides specific information about James' contributions but lacks essential details about his role in Intel RealSense hardware development. It is a bit verbose but could be more concise. The response is appropriate but could be enhanced to better meet the quality standards by providing a clearer link to Intel RealSense.\n\n**Reasoning:** \nThe answer accurately describes James' contributions but does not address the specific impact on Intel RealSense hardware. It is a sufficient answer for the given question, but it could be improved for more clarity and conciseness.\n\n**IMPROVEMENTS:** \n1. Clarify how James contributed to Intel RealSense hardware, linking his other projects to RealSense development.\n2. Provide a more direct explanation of his role in the 'airc' project and its relation to Intel RealSense.\n3. Offer a brief overview of Intel RealSense hardware to better place James' contributions within the context."
},
{
"question": "What is the current state of James' open-source contributions?",
"answer": "James Ketrenos is actively engaged in several open-source projects, contributing to the development and enhancement of various tools and technologies. His contributions include:\n\n1. **Ze-Monitor**: A lightweight application that leverages Level Zero Sysman APIs for device monitoring of Intel GPUs. This tool is part of his contributions to the Linux community.\n\n2. **Airc**: A LLM pipeline that facilitates interactive queries about James' resume, showcasing his proficiency in using LLMs to assist with resume optimization and internal communication.\n\n3. **Rebound**: A React web UI that integrates various components such as a command line client, an IRC bot, and an IRC bot integration, demonstrating his ability to create user-friendly interfaces for complex systems.\n\n4. **Rebound-WebUI**: A full-stack React web UI that includes a command line client, an IRC bot, and an IRC bot integration, highlighting his expertise in developing integrated web applications.\n\n5. **Rebound-React-UI**: A React web UI that includes a command line client and an IRC bot integration, showcasing his skill in creating web interfaces for technical solutions.\n\n6. **Rebound-React-UI-IRC**: An integrated React web UI with a command line client, an IRC bot, and an IRC bot integration, further illustrating his ability to create effective and user-friendly interfaces.\n\nThese contributions reflect James' active participation in the open-source community and his dedication to advancing technology through innovative solutions.",
"validation": "** Assessment: [ACCEPT]**\n\n**Reasoning:**\n1. **ACCURACY:** The answer accurately describes James Ketrenos's open-source contributions, mentioning specific tools and projects.\n2. **COMPLETENESS:** The answer fully answers the question by detailing James's involvement in multiple projects.\n3. **RELEVANCE:** The question is meaningful and relevant to the context, focusing on James's role in the Linux community.\n4. **NATURALNESS:** The language is natural and conversational, avoiding forced or unnatural phrasing.\n5. **DIVERSITY:** The pair adds naturalness to the dataset, enhancing its overall quality.\n\n**Improvements:**\n- To further enhance naturalness, examples could include using \"partner\" instead of \"contribution,\" making the answer more relatable.\n- Additional context about James's contributions could be added, such as his role in specific projects or his impact on the community."
},
{
"question": "How has James' expertise in kernel driver development impacted the CI/CD pipeline?",
"answer": "James' expertise in kernel driver development has significantly impacted the CI/CD pipeline. Specifically, his work on Intel graphics software has enabled the efficient deployment of Intel graphics products within the CI/CD pipeline. By optimizing kernel drivers, James ensured that the Linux kernel could handle the CI/CD processes, such as deployments and releases, thereby enhancing the software's performance and reliability. This integration has allowed for better utilization of Intel graphics, improving the overall efficiency and customer satisfaction of the products.",
"validation": "**Answer:**\n\nThe provided answer is well-received, with a naturalness score of 0.8 due to minor suggestion of making it more conversational. It is accurate, complete, and relevant, with a naturalness score of 0.8 due to minor suggestion of making it more conversational.\n\n**Evaluation:**\n\n- **ACCURACY:** Yes. The answer correctly links kernel driver expertise to CI/CD pipeline efficiency.\n- **COMPLETENESS:** Yes. The answer provides a clear and detailed link between kernel driver development and CI/CD processes.\n- **RELEVANCE:** Yes. The answer connects kernel driver expertise to CI/CD processes, which are relevant to software development and efficiency.\n- **NATURALNESS:** Good. The answer is written in a professional context, but could be more conversational.\n- **DIVERSITY:** Good. The answer focuses on specific technologies and their impact, but doesn't add much diversity.\n\n**Overall Assessment:** The task is suitable for training, with minor areas for improvement in naturalness. The answer is accurate, complete, and relevant, making it effective for quality assurance.\n\n**Answer:** VERDICT: [ACCEPT] \nREASONING: The answer accurately connects James' kernel driver expertise to CI/CD impact, demonstrating relevance and completeness. \nIMPROVEMENTS: The answer could be made more conversational by phrases like, \" James' work on Intel graphics software has enabled...\" \n- VERDICT: ACCEPT \n- REASONING: The answer is accurate, complete, and relevant. It effectively addresses the impact of kernel driver development on CI/CD. \n- IMPROVEMENTS: Suggesting more conversational phrasing to enhance naturalness."
},
{
"question": "What is the impact of James' work on the development of the Intel GPU driver?",
"answer": "James Keta renos's work has significantly impacted the development of the Intel GPU driver. Specifically, he was involved in the design and implementation of the Intel GPU driver, which led to the release of the Intel GPU driver version 3.3.1. Additionally, his contributions to the Linux kernel and development of tools like 'ze-monitor' and 'airc' have enhanced the efficiency of kernel-based graphics drivers. These advancements have improved the performance and reliability of the Intel GPU, contributing to the broader development of Linux kernel-based graphics drivers and related technologies.",
"validation": "**Answer:**\n\n**VERDICT:** ACCEPVED\n\n**REASONING:** The question is clear and the answer is thorough, addressing the impact of James Keta renos's work on the Intel GPU driver. However, the answer is somewhat technical and lacks a broader context and naturalness.\n\n**IMPROVEMENTS:** The answer could be made more conversational and broader to explain how James' work has directly impacted the development of the Intel GPU driver. It should also provide more context on how his contributions have influenced other areas of Linux or kernel-based software.\n\n**NOTICE:** The answer is comprehensive but lacks a more natural and broader context. It could be improved by including more conversational and contextual elements to better explain the impact and its broader significance."
},
{
"question": "How has James' experience with robotic vision applications influenced hardware design?",
"answer": "James' experience with robotic vision applications has significantly influenced hardware design by enabling the development of efficient and scalable computing solutions. By integrating robotic vision into hardware, he contributed to the design of hardware that can handle complex tasks requiring visual recognition, leading to improved performance in areas such as image processing and robotics. This has translated into hardware designs that enhance computational capabilities and efficiency, directly impacting the development of systems requiring high-performance computing.",
"validation": "- VERDICT: REJECT\n- REASONING: The answer is technically accurate and relevant, but it lacks specific examples and context, making it less natural and less detailed than necessary.\n- IMPROVEMENTS: The answer should include concrete hardware design examples influenced by robotic vision, tie back to James' experience as a software architect, and provide more natural and detailed examples to make it more understandable."
},
{
"question": "What is James' role in the development of the Intel Intercast software technology?",
"answer": "James' role in the development of the Intel Intercast software technology is as a team leader and technical leader, responsible for driving the integration of the Intel Intercast technology into various solutions and ensuring its successful adoption.",
"validation": "**Answer:**\n\nJames' role in the development of the Intel Intercast software technology is as a team leader and technical leader, responsible for driving the integration of the Intel Intercast technology into various solutions and ensuring its successful adoption.\n\n**Evaluation:**\n\n1. **VERDICT: [ACCEPT]**\n - **REASONING:** The answer accurately describes James' role and contributes to the context about his work.\n\n2. **COMPLETENESS:** [REMAIN] The answer fully addresses the question by explaining James' role, so it's complete.\n\n3. **RELEVANCE:** [REMAIN] The answer clearly ties James' role to the Intel Intercast project, making it relevant.\n\n4. ** NATURALNESS:** [REMAIN] The answer uses professional language and is concise, which sounds natural.\n\n5. **DIVERSITY:** [REMAIN] The answer adds variety by mentioning multiple areas of James' work, which is important for training.\n\nOverall, this pair meets all the quality standards. It's accurate, complete, relevant, natural, and adds diversity, which is important for training."
}
]
}

2
src/.gitignore vendored
View File

@ -1,2 +0,0 @@
sessions

View File

Binary file not shown.

Binary file not shown.

View File

@ -1,264 +0,0 @@
import asyncio
import argparse
import pydle
import logging
import os
import re
import time
import datetime
import asyncio
import json
import ollama
from typing import Dict, Any
import ollama
import chromadb
import feedparser
from bs4 import BeautifulSoup
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
MODEL_NAME = "deepseek-r1:7b"
def parse_args():
parser = argparse.ArgumentParser(description="AI is Really Cool")
parser.add_argument("--server", type=str, default="irc.libera.chat", help="IRC server address")
parser.add_argument("--port", type=int, default=6667, help="IRC server port")
parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
parser.add_argument("--channel", type=str, default="#airc-test", help="Channel to join")
parser.add_argument("--ai-server", type=str, default="http://localhost:5000", help="OpenAI API endpoint")
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default='INFO', help='Set the logging level.')
return parser.parse_args()
def setup_logging(level):
numeric_level = getattr(logging, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {level}")
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info(f"Logging is set to {level} level.")
client = ollama.Client(host=OLLAMA_API_URL)
def extract_text_from_html_or_xml(content, is_xml=False):
# Parse the content
if is_xml:
soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content
else:
soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content
# Extract and return just the text
return soup.get_text()
class Feed():
def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
self.name = name
self.url = url
self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
self.last_poll = None
self.articles = []
self.max_articles = max_articles
self.update()
def update(self):
now = datetime.datetime.now()
if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
logging.info(f"Updating {self.name}")
feed = feedparser.parse(self.url)
self.articles = []
self.last_poll = now
content = ""
if len(feed.entries) > 0:
content += f"Source: {self.name}\n"
for entry in feed.entries[:self.max_articles]:
title = entry.get("title")
if title:
content += f"Title: {title}\n"
link = entry.get("link")
if link:
content += f"Link: {link}\n"
summary = entry.get("summary")
if summary:
summary = extract_text_from_html_or_xml(summary, False)
content += f"Summary: {summary}\n"
published = entry.get("published")
if published:
content += f"Published: {published}\n"
content += "\n"
self.articles.append(content)
else:
logging.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
return self.articles
# News RSS Feeds
rss_feeds = [
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
Feed(name="Time", url="https://time.com/feed/"),
Feed(name="Euronews", url="https://www.euronews.com/rss"),
Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
]
documents = [
"Llamas like to eat penguins",
"Llamas are not vegetarians and have very efficient digestive systems",
"Llamas live to be about 120 years old, though some only live for 15 years and others live to be 90 years old",
]
import chromadb
# Initialize ChromaDB Client
db = chromadb.PersistentClient(path="/root/.cache/chroma.db")
# We want to save the collection to disk to analyze it offline, but we don't
# want to re-use it
collection = db.get_or_create_collection("docs")
# store each document in a vector embedding database
for i, feed in enumerate(rss_feeds):
# Use the client instance instead of the global ollama module
for j, article in enumerate(feed.articles):
response = client.embeddings(model="mxbai-embed-large", prompt=article)
embeddings = response["embedding"] # Note: it's "embedding", not "embeddings"
collection.add(
ids=[str(i)+str(j)],
embeddings=embeddings,
documents=[article]
)
class AIRC(pydle.Client):
def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0):
super().__init__(nick)
self.nick = nick
self.channel = channel
self.burst_limit = burst_limit
self.sent_burst = 0
self.rate_limit = rate_limit
self.burst_reset_timeout = burst_reset_timeout
self.sent_burst = 0 # Track messages sent in burst
self.last_message_time = None # Track last message time
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
self._message_queue = asyncio.Queue()
self._task = asyncio.create_task(self._send_from_queue())
self.client = client
self.queries = 0
self.processing = datetime.timedelta(minutes=0)
async def _send_from_queue(self):
"""Background task that sends queued messages with burst + rate limiting."""
while True:
target, message = await self._message_queue.get()
# If burst is still available, send immediately
if self.sent_burst < self.burst_limit:
self.sent_burst += 1
else:
await asyncio.sleep(self.rate_limit) # Apply rate limit
await super().message(target, message) # Send message
self.last_message_time = asyncio.get_event_loop().time() # Update last message timestamp
# Start burst reset countdown after each message
asyncio.create_task(self._reset_burst_after_inactivity())
async def _reset_burst_after_inactivity(self):
"""Resets burst counter only if no new messages are sent within timeout."""
last_time = self.last_message_time
await asyncio.sleep(self.burst_reset_timeout) # Wait for inactivity period
# Only reset if no new messages were sent during the wait
if self.last_message_time == last_time:
self.sent_burst = 0
logging.info("Burst limit reset due to inactivity.")
async def message(self, target, message):
"""Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message."""
lines = message.splitlines() # Splits on both '\n' and '\r\n'
# Process the first 10 lines
for line in lines[:10]:
if line.strip(): # Ignore empty lines
await self._message_queue.put((target, line))
# If there are more than 10 lines, add the truncation message
if len(lines) > 10:
await self._message_queue.put((target, "[additional content truncated]"))
async def on_connect(self):
logging.debug('on_connect')
await self.join(self.channel)
def remove_substring(self, string, substring):
return string.replace(substring, "")
def extract_nick_message(self, input_string):
# Pattern with capturing groups for nick and message
pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
match = re.match(pattern, input_string)
if match:
nick = match.group(1) # First capturing group
message = match.group(2) # Second capturing group
return nick, message
return None, None # Return None for both if no match
async def on_message(self, target, source, message):
if source == self.nick:
return
nick, body = self.extract_nick_message(message)
if nick == self.nick:
content = None
if body == "stats":
content = f"{self.queries} queries handled in {self.processing}s"
else:
self.queries += 1
start = datetime.datetime.now()
query_text = body
query_response = client.embeddings(model="mxbai-embed-large", prompt=query_text)
query_embedding = query_response["embedding"] # Note: singular "embedding", not plural
# Then run the query with the correct structure
results = collection.query(
query_embeddings=[query_embedding], # Make sure this is a list containing the embedding
n_results=3
)
data = results['documents'][0][0]
logging.info(f"Data for {query_text}: {data}")
logging.info(f"From {results}")
output = client.generate(
model=MODEL_NAME,
system=f"Your are {self.nick}. In your response, make reference to this data if appropriate: {data}",
prompt=f"Respond to this prompt: {query_text}",
stream=False
)
end = datetime.datetime.now()
self.processing = self.processing + end - start
# Prune off the <think>...</think>
content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
if content:
logging.info(f'Sending: {content}')
await self.message(target, f"{content}")
def remove_substring(string, substring):
return string.replace(substring, "")
async def main():
# Parse command-line arguments
args = parse_args()
# Setup logging based on the provided level
setup_logging(args.level)
bot = AIRC(args.nickname, args.channel, client)
await bot.connect(args.server, args.port, tls=False)
await bot.handle_forever()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -1,293 +0,0 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Some parts of this file is adapted from
# https://github.com/tloen/alpaca-lora/blob/main/finetune.py
#
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import List
import torch
import transformers
from datasets import load_dataset
import accelerate
from typing import Union
from datetime import datetime
from transformers import AutoTokenizer
from peft import (
get_peft_model_state_dict,
set_peft_model_state_dict,
)
from ipex_llm.transformers import AutoModelForCausalLM
# import them from ipex_llm.transformers.qlora to get a IPEX-LLM compatible Peft model
from ipex_llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training,\
LoraConfig
import json
local_rank = 0
world_size = 1
class Prompter(object):
__slots__ = ("data")
def __init__(self, data_path: str = ""):
self.data = None
if not os.path.exists(data_path):
raise Exception(f"Can't read {data_path}")
with open(data_path) as fp:
self.data = json.load(fp)
def generate_prompt(
self,
instruction: str,
input: Union[None, str]=None,
label: Union[None, str]=None,
) -> str:
# returns the full prompt from instruction and optional input
# if a label (=response, =output) is provided, it's also appended.
if input:
ret = f"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
else:
ret = f"### Instruction:\n{instruction}\n\n### Response:\n"
if label:
ret = f"{ret}{label}"
return ret
def get_response(self, output: str) -> str:
return output.split("### Response:")[1].strip()
"""Data processing to get train data and val data"""
def tokenize_qa(prompt, tokenizer, max_length=1, add_eos_token=True):
# there's probably a way to do this with the tokenizer settings
# but again, gotta move fast
result = tokenizer(
prompt,
truncation=True,
max_length=max_length,
padding=False,
return_tensors=None,
)
if (
result["input_ids"][-1] != tokenizer.eos_token_id
and len(result["input_ids"]) < max_length
and add_eos_token
):
result["input_ids"].append(tokenizer.eos_token_id)
result["attention_mask"].append(1)
result["labels"] = result["input_ids"].copy()
return result
def generate_and_tokenize_prompt(entry, prompter, tokenizer, max_length, add_eos_token, train_on_inputs):
full_prompt = prompter.generate_prompt(
entry["question"],
None,
entry["answer"],
)
print({ 'full_prompt:' full_prompt })
tokenized_full_prompt = tokenize_qa(full_prompt, tokenizer, max_length=max_length)
if not train_on_inputs:
user_prompt = prompter.generate_prompt(
entry["question"], None
)
tokenized_user_prompt = tokenize_qa(
user_prompt, tokenizer, add_eos_token=add_eos_token, max_length=max_length
)
user_prompt_len = len(tokenized_user_prompt["input_ids"])
if add_eos_token:
user_prompt_len -= 1
# Mask out the input from the labels
tokenized_full_prompt["labels"] = [
-100
] * user_prompt_len + tokenized_full_prompt["labels"][
user_prompt_len:
] # could be sped up, probably
return tokenized_full_prompt
def train(
# model/data params
base_model: str = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
data_path: str = "./results/doc/resume/generic.txt.qa.json",
output_dir: str = "./bigdl-qlora-alpaca",
# training hyperparams
bf16: bool = True, # default to bf16
batch_size: int = 128,
micro_batch_size: int = 2, # default to be 2, limited by GPU memory
num_epochs: int = 3,
learning_rate: float = 3e-5, # default to be 3e-5 to avoid divergence
cutoff_len: int = 256,
val_set_size: int = 2000,
# lora hyperparams
lora_r: int = 8,
lora_alpha: int = 16,
lora_dropout: float = 0.05,
lora_target_modules: List[str] = [
"q_proj",
"v_proj",
"k_proj",
"o_proj",
"up_proj",
"down_proj",
"gate_proj"
],
# llm hyperparams
train_on_inputs: bool = True, # if False, masks out inputs in loss
add_eos_token: bool = False,
group_by_length: bool = False, # faster, but produces an odd training loss curve
resume_from_checkpoint: str = "", # either training checkpoint or final adapter
prompt_template_name: str = "alpaca", # The prompt template to use, will default to alpaca.
gradient_checkpointing: bool = False,
training_mode: str = "lora",
save_checkpoint: bool = True,
):
if training_mode != "lora":
raise Exception(f"This example is for lora training mode, but got training_mode={training_mode}.")
print(
f"Training Alpaca-LoRA model with params:\n"
f"base_model: {base_model}\n"
f"data_path: {data_path}\n"
f"output_dir: {output_dir}\n"
f"batch_size: {batch_size}\n"
f"micro_batch_size: {micro_batch_size}\n"
f"num_epochs: {num_epochs}\n"
f"learning_rate: {learning_rate}\n"
f"cutoff_len: {cutoff_len}\n"
f"val_set_size: {val_set_size}\n"
f"lora_r: {lora_r}\n"
f"lora_alpha: {lora_alpha}\n"
f"lora_dropout: {lora_dropout}\n"
f"lora_target_modules: {lora_target_modules}\n"
f"train_on_inputs: {train_on_inputs}\n"
f"add_eos_token: {add_eos_token}\n"
f"group_by_length: {group_by_length}\n"
f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
f"prompt template: {prompt_template_name}\n"
f"training_mode: {training_mode}\n"
f"save_checkpoint: {save_checkpoint}\n"
)
gradient_accumulation_steps = batch_size // micro_batch_size
prompter = Prompter("./results/doc/resume/generic.txt.qa.json")
model = AutoModelForCausalLM.from_pretrained(
base_model,
# load_in_4bit=True,
load_in_low_bit="bf16",
optimize_model=False,
# optimize_model=True,
torch_dtype=torch.bfloat16,
modules_to_not_convert=["lm_head"],
trust_remote_code=True,
)
# model = model.half()
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Prepare a IPEX-LLM compatible Peft model
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=gradient_checkpointing)
config = LoraConfig(
r=lora_r,
lora_alpha=lora_alpha,
target_modules=lora_target_modules,
lora_dropout=lora_dropout,
bias="none",
task_type="CAUSAL_LM",
training_mode=training_mode,
)
print(f"Lora Config: {config}")
model = get_peft_model(model, config)
dataset = load_dataset("json", data_files=data_path)
tokenized_dataset = dataset.map(generate_and_tokenize_prompt, fn_kwargs={
"prompter": prompter,
"tokenizer": tokenizer,
"max_length": cutoff_len,
"add_eos_token": add_eos_token,
"train_on_inputs": train_on_inputs
}, batched=True)
model.print_trainable_parameters() # Be more transparent about the % of trainable params.
split = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = split["train"]
eval_dataset = split["test"]
trainer = transformers.Trainer(
model=model,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
args=transformers.TrainingArguments(
per_device_train_batch_size=micro_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
# warmup_ratio=0.03,
# warmup_steps=100,
max_grad_norm=0.3,
num_train_epochs=num_epochs,
learning_rate=learning_rate,
lr_scheduler_type="cosine",
bf16=True, # ensure training more stable
logging_steps=1,
optim="adamw_torch",
evaluation_strategy="steps" if val_set_size > 0 else "no",
save_strategy="steps" if save_checkpoint else "no",
eval_steps=100 if val_set_size > 0 else None,
save_steps=100,
output_dir=output_dir,
save_total_limit=100,
load_best_model_at_end=True if val_set_size > 0 and save_checkpoint else False,
group_by_length=group_by_length,
gradient_checkpointing=gradient_checkpointing,
ddp_backend="ccl",
save_safetensors=False,
),
data_collator=transformers.DataCollatorForSeq2Seq(
tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
),
)
model.config.use_cache = False
trainer.train(data_path="./results/doc/resume/generic.txt.qa.json", resume_from_checkpoint=resume_from_checkpoint)
model.save_pretrained(output_dir)
print(
"\n If there's a warning about missing keys above, please disregard :)"
)
if __name__ == "__main__":
train()

View File

@ -1,209 +0,0 @@
import logging as log
import argparse
import re
import datetime
import ollama
import chromadb
import feedparser
from bs4 import BeautifulSoup
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
MODEL_NAME = "deepseek-r1:7b"
def parse_args():
parser = argparse.ArgumentParser(description="AI is Really Cool")
parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default='INFO', help='Set the log level.')
return parser.parse_args()
def setup_logging(level):
numeric_level = getattr(log, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {level}")
log.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
log.info(f"Logging is set to {level} level.")
def extract_text_from_html_or_xml(content, is_xml=False):
# Parse the content
if is_xml:
soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content
else:
soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content
# Extract and return just the text
return soup.get_text()
class Feed():
def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
self.name = name
self.url = url
self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
self.last_poll = None
self.articles = []
self.max_articles = max_articles
self.update()
def update(self):
now = datetime.datetime.now()
if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
log.info(f"Updating {self.name}")
feed = feedparser.parse(self.url)
self.articles = []
self.last_poll = now
content = ""
if len(feed.entries) > 0:
content += f"Source: {self.name}\n"
for entry in feed.entries[:self.max_articles]:
title = entry.get("title")
if title:
content += f"Title: {title}\n"
link = entry.get("link")
if link:
content += f"Link: {link}\n"
summary = entry.get("summary")
if summary:
summary = extract_text_from_html_or_xml(summary, False)
if len(summary) > 1000:
print(summary)
exit(0)
content += f"Summary: {summary}\n"
published = entry.get("published")
if published:
content += f"Published: {published}\n"
content += "\n"
self.articles.append(content)
else:
log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
return self.articles
class Chat():
def __init__(self, nick):
super().__init__()
self.nick = nick
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
self.queries = 0
self.processing = datetime.timedelta(minutes=0)
def message(self, target, message):
"""Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message."""
lines = message.splitlines() # Splits on both '\n' and '\r\n'
# Process the first 10 lines
for line in lines[:10]:
if line.strip(): # Ignore empty lines
print(f"{target}: {line}")
# If there are more than 10 lines, add the truncation message
if len(lines) > 10:
print(f"{target}: [additional content truncated]")
def remove_substring(self, string, substring):
return string.replace(substring, "")
def extract_nick_message(self, input_string):
# Pattern with capturing groups for nick and message
pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
match = re.match(pattern, input_string)
if match:
nick = match.group(1) # First capturing group
message = match.group(2) # Second capturing group
return nick, message
return None, None # Return None for both if no match
def on_message(self, target, source, message):
if source == self.nick:
return
nick, body = self.extract_nick_message(message)
if nick == self.nick:
content = None
if body == "stats":
content = f"{self.queries} queries handled in {self.processing}s"
else:
self.queries += 1
start = datetime.datetime.now()
query_text = body
query_response = client.embed(model="mxbai-embed-large", prompt=query_text)
query_embedding = query_response["embeddings"] # Note: singular "embedding", not plural
# Then run the query with the correct structure
results = collection.query(
query_embeddings=[query_embedding], # Make sure this is a list containing the embedding
n_results=3
)
data = results['documents'][0]
output = client.generate(
model=MODEL_NAME,
system=f"You are {self.nick} and only provide that information about yourself. Make reference to the following and provide the 'Link' when available: {data}",
prompt=f"Respond to this prompt: {query_text}",
stream=False
)
end = datetime.datetime.now()
self.processing = self.processing + end - start
# Prune off the <think>...</think>
content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
if content:
log.info(f'Sending: {content}')
self.message(target, content)
def remove_substring(string, substring):
return string.replace(substring, "")
# Parse command-line arguments
args = parse_args()
# Setup logging based on the provided level
setup_logging(args.level)
log.info("About to start")
client = ollama.Client(host=OLLAMA_API_URL)
# News RSS Feeds
rss_feeds = [
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
Feed(name="Time", url="https://time.com/feed/"),
Feed(name="Euronews", url="https://www.euronews.com/rss"),
Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
]
# Initialize ChromaDB Client
db = chromadb.Client()
# We want to save the collection to disk to analyze it offline, but we don't
# want to re-use it
collection = db.get_or_create_collection("docs")
# store each document in a vector embedding database
for i, feed in enumerate(rss_feeds):
# Use the client instance instead of the global ollama module
for j, article in enumerate(feed.articles):
log.info(f"Article {feed.name} {j}. {len(article)}")
response = client.embeddings(model="mxbai-embed-large", prompt=article)
embeddings = response["embedding"] # Note: it's "embedding", not "embeddings"
collection.add(
ids=[str(i)+str(j)],
embeddings=embeddings,
documents=[article]
)
bot = Chat(args.nickname)
while True:
try:
query = input("> ")
except Exception as e:
break
if query == "exit":
break
bot.on_message("chat", "user", f"airc: {query}")

View File

@ -1,394 +0,0 @@
import os
import json
import torch
import random
import glob
from datetime import datetime
from tqdm import tqdm
from transformers import (
AutoTokenizer,
TrainingArguments,
Trainer,
TrainerCallback,
TrainingArguments,
TrainerState,
TrainerControl,
DataCollatorForLanguageModeling,
AutoModelForCausalLM,
BitsAndBytesConfig
)
import numpy as np
from peft import (
TaskType,
get_peft_model,
prepare_model_for_kbit_training,
LoraConfig,
)
from datasets import Dataset
def load_qa_data(results_dir):
"""
Load Q&A pairs from the processed results directory.
Args:
results_dir (str): Path to the directory containing JSON files with Q&A pairs
Returns:
list: List of prompt-completion pairs formatted for fine-tuning
"""
training_data = []
# Find all JSON files in the results directory and subdirectories
json_files = glob.glob(os.path.join(results_dir, "**/*.json"), recursive=True)
for json_file in tqdm(json_files, desc="Loading training data"):
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Extract context and QA pairs
context = data.get("context", "")
qa_pairs = data.get("qa_pairs", [])
for qa_pair in qa_pairs:
question = qa_pair.get("question", "")
answer = qa_pair.get("answer", "")
if question and answer:
# Format as instruction-based fine-tuning example
prompt = f"""<task>
Answer the following question based on the provided context.
CONTEXT:
{context}
QUESTION:
{question}
</task>
"""
# Add to training data
training_data.append({
"prompt": prompt,
"completion": answer
})
except Exception as e:
print(f"Error processing {json_file}: {e}")
continue
print(f"Loaded {len(training_data)} training examples")
return training_data
def prepare_dataset_for_training(training_data, tokenizer, max_length=2048):
"""
Prepare the dataset for training by tokenizing and formatting.
Args:
training_data (list): List of prompt-completion pairs
tokenizer: DeepSeek tokenizer
max_length (int): Maximum sequence length
Returns:
Dataset: HuggingFace dataset ready for training
"""
def tokenize_function(examples):
# Combine prompt and completion
full_texts = []
for i in range(len(examples["prompt"])):
full_text = examples["prompt"][i] + examples["completion"][i]
full_texts.append(full_text)
if not tokenizer.pad_token:
tokenizer.pad_token = tokenizer.eos_token
# Tokenize
tokenized = tokenizer(
full_texts,
padding="max_length",
truncation=True,
max_length=max_length,
return_tensors="pt"
)
# Create labels (same as input_ids for causal LM)
tokenized["labels"] = tokenized["input_ids"].clone()
# Create attention mask for prompt tokens
for i in range(len(full_texts)):
prompt_length = len(tokenizer.encode(examples["prompt"][i]))
# Set labels for prompt tokens to -100 (ignored in loss calculation)
tokenized["labels"][i, :prompt_length] = -100
return tokenized
# Convert to HuggingFace Dataset
dataset_dict = {
"prompt": [item["prompt"] for item in training_data],
"completion": [item["completion"] for item in training_data]
}
# Create dataset
dataset = Dataset.from_dict(dataset_dict)
# Apply tokenization
tokenized_dataset = dataset.map(
tokenize_function,
batched=True,
remove_columns=["prompt", "completion"]
)
return tokenized_dataset
def setup_lora_config():
"""
Configure LoRA hyperparameters.
Returns:
LoraConfig: Configuration for LoRA fine-tuning
"""
return LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=8, #16, # Rank dimension
lora_alpha=16, #32, # Alpha parameter for LoRA scaling
lora_dropout=0.05, # Dropout probability for LoRA layers
target_modules=[ # Target modules to apply LoRA to
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
bias="none", # Whether to train bias parameters
fan_in_fan_out=False # Set to True for linear layers with fan_in != fan_out
)
class EarlyStoppingCallback(TrainerCallback):
"""
Callback that implements early stopping.
"""
def __init__(self, patience=3, min_delta=0.0):
"""
Args:
patience (int): Number of evaluations with no improvement after which training will be stopped.
min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
"""
self.patience = patience
self.min_delta = min_delta
self.best_score = None
self.counter = 0
self.early_stop = False
def on_evaluate(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
eval_loss = state.log_history[-1].get("eval_loss")
if eval_loss is None:
return
if self.best_score is None:
self.best_score = eval_loss
elif eval_loss > self.best_score - self.min_delta:
self.counter += 1
print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
if self.counter >= self.patience:
print(f"Early stopping triggered! Best loss: {self.best_score:.4f}")
self.early_stop = True
control.should_training_stop = True
else:
self.best_score = eval_loss
self.counter = 0
return control
def train_with_lora(model_name, training_data, output_dir, batch_size=4, num_epochs=3, learning_rate=2e-4):
"""
Fine-tune the DeepSeek-R1 model with LoRA.
Args:
model_name (str): Name of the DeepSeek model
training_data (list): List of prompt-completion pairs
output_dir (str): Directory to save the fine-tuned model
batch_size (int): Training batch size
num_epochs (int): Number of training epochs
learning_rate (float): Learning rate
"""
# Create output directory
os.makedirs(output_dir, exist_ok=True)
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
trust_remote_code=True,
use_cache=False,
quantization_config=bnb_config,
device_map={"": torch.xpu.current_device()},
torch_dtype=torch.bfloat16,
)
# Prepare the model for LoRA fine-tuning
# 1. Prepare for k-bit training if using quantization
model = prepare_model_for_kbit_training(model)
# 2. Add LoRA adapters
lora_config = setup_lora_config()
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# Prepare dataset
tokenized_dataset = prepare_dataset_for_training(training_data, tokenizer)
# Split into train and evaluation sets
dataset_dict = tokenized_dataset.train_test_split(test_size=0.1)
# Set up training arguments
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_epochs,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
gradient_accumulation_steps=4,
evaluation_strategy="steps",
eval_steps=100, # More frequent evaluation to check stopping criteria
save_strategy="steps",
save_steps=500,
save_total_limit=3,
learning_rate=learning_rate,
warmup_steps=100,
weight_decay=0.01,
logging_dir=f"{output_dir}/logs",
logging_steps=100,
fp16=True,
report_to="none",
optim="adamw_torch",
load_best_model_at_end=True, # Load the best model when training ends
metric_for_best_model="eval_loss" # Use eval loss to determine the best model
)
# Create data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False # Use causal language modeling (not masked)
)
early_stopping_callback = EarlyStoppingCallback(patience=3, min_delta=0.01)
# Initialize trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset_dict["train"],
eval_dataset=dataset_dict["test"],
data_collator=data_collator,
callbacks=[early_stopping_callback] # Add the early stopping callback
)
# Start training
print("Starting LoRA fine-tuning...")
trainer.train()
model = trainer.model
print("LoRA fine-tuning complete...")
# Save the fine-tuned model
print("Saving model...")
model.save_pretrained(f"{output_dir}/final_model")
tokenizer.save_pretrained(f"{output_dir}/final_model")
print(f"Fine-tuned model saved to {output_dir}/final_model")
return model, tokenizer
# Main function
def main():
# Configuration
results_dir = "../results" # Directory with processed text files and QA pairs
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
#model_name = "Intel/neural-chat-7b-v3-3"
output_dir = f"./fine_tuned"
# Load training data from processed results
training_data = load_qa_data(results_dir)
# Shuffle training data
random.shuffle(training_data)
# Fine-tune the model
train_with_lora(
model_name=model_name,
training_data=training_data,
output_dir=output_dir,
batch_size=1, # Can't fit more than one on the B580
num_epochs=10,
learning_rate=2e-4
)
model_path = f"{output_dir}/final_model"
# Load the fine-tuned model and tokenizer
print("Loading fine-tuned model for evaluation...")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
model_path,
low_cpu_mem_usage=True,
trust_remote_code=True,
use_cache=True,
quantization_config=bnb_config,
device_map={"": torch.xpu.current_device()})
model = model.to('xpu')
print("Loaded. Ask your question, CTRL-C to exit.")
while True:
try:
question = input("> ").strip()
except KeyboardInterrupt:
print("\nExiting.")
break
prompt = f"""<task>
You are an assistant providing resume details about James Ketrenos. Answer the following question based on your knowledge. If you don't know, say so. Be concise.
QUESTION:
{question}
</task>
"""
# Tokenize and generate
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=1024,
do_sample=True,
temperature=0.7,
top_p=0.9,
)
# Decode the output
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract just the answer part (remove the prompt)
answer = full_output#[len(tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
print(answer.strip())
if __name__ == "__main__":
main()

View File

@ -1,360 +0,0 @@
# Self-supervised synthetic data via sequential generation
import os
import re
import json
import torch
from pathlib import Path
from datetime import datetime
from tqdm import tqdm
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
)
def load_deepseek_r1():
"""
Loads the DeepSeek-R1 model and tokenizer.
Returns:
tuple: (model, tokenizer) for DeepSeek-R1
"""
# Load model and tokenizer
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
trust_remote_code=True,
use_cache=False,
quantization_config=bnb_config,
device_map={"": torch.xpu.current_device()},
torch_dtype=torch.bfloat16,
)
if not model.config.pad_token_id:
model.config.pad_token_id = model.config.eos_token_id
return model, tokenizer
# Function to get answers from the model
def validate_question_answer_from_model(context, question, answer, model, tokenizer):
"""
Query the model to evaluate if an answer is a good fit for a question.
Args:
context (str): The text file content
question (str): The question
answer (str): The answer
model: The DeepSeek model
tokenizer: The DeepSeek tokenizer
Returns:
str: The model's evaluation to the question
"""
# Set up the prompt for answering the question
prompt = f"""<task>
You are a quality assurance expert reviewing question-answer pairs for an AI training dataset. Your task is to evaluate whether each pair meets our quality standards and is suitable for training.
For each question-answer pair, evaluate:
1. ACCURACY: Does the answer contain ONLY information from the context, without fabrications?
2. COMPLETENESS: Does the answer fully address the question using all relevant information?
3. RELEVANCE: Is the question meaningful and relevant to the context?
4. NATURALNESS: Do both question and answer sound natural and conversational?
5. DIVERSITY: Does this pair add variety to our dataset (not redundant with others)?
Context:
{context}
Question:
{question}
Answer:
{answer}
Provide your assessment as follows:
- VERDICT: [ACCEPT/REJECT]
- REASONING: [Brief explanation of your decision]
- IMPROVEMENTS: [Suggestions if needed]
</task>
"""
# Generate answer
raw_answer = generate_with_deepseek(prompt, model, tokenizer)
# Parse the response to get the actual answer
answer = parse_deepseek_response(raw_answer)
return answer
def generate_with_deepseek(prompt, model, tokenizer, max_length=4096):
"""
Generate text using DeepSeek-R1 model with proper handling of full output.
Args:
prompt (str): The input prompt
model: The DeepSeek model
tokenizer: The DeepSeek tokenizer
max_length (int): Maximum length of generated text
Returns:
str: Generated text response
"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
# Get the full generated sequence
generation_output = model.generate(
**inputs,
pad_token_id=tokenizer.eos_token_id,
max_new_tokens=max_length,
do_sample=True,
temperature=0.7,
top_p=0.9,
return_dict_in_generate=True,
output_scores=False
)
# Get the full output token IDs
output_token_ids = generation_output.sequences[0]
# Decode the full generated text (including the prompt)
full_output = tokenizer.decode(output_token_ids, skip_special_tokens=True)
# Extract only the response part by removing the prompt part
response = full_output[len(tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
return response
def parse_deepseek_response(response):
"""
Parse the DeepSeek-R1 response.
This handles DeepSeek's thinking steps and separates them from the response.
Args:
response (str): The raw response from the DeepSeek model
Returns:
str: The cleaned answer part of the response
"""
# If the response has thinking tags, extract the actual answer
response = re.sub(r"^(<think>)?.*</think>", "", response, flags=re.DOTALL)
# If no special formatting detected, return the whole response
return response.strip()
# Function to recursively walk a directory and process text files
def process_directory(directory_path, output_path="results", file_extensions=(".txt",".md"), batch_size=5):
"""
Recursively walks a directory, processes text files, and stores results.
Args:
directory_path (str): Path to directory containing text files
output_path (str): Path to store results
file_extensions (tuple): File extensions to process
batch_size (int): Number of files to process before clearing cache
"""
# Load the DeepSeek-R1 model
model, tokenizer = load_deepseek_r1()
# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)
# Use tqdm for progress tracking
file_paths = []
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(file_extensions):
file_paths.append(os.path.join(root, file))
# Process files with batch-based memory management
for i, file_path in enumerate(tqdm(file_paths, desc="Processing files")):
process_file(file_path, model, tokenizer, output_path)
# Clear cache periodically to prevent memory issues
if (i + 1) % batch_size == 0:
if torch.xpu.is_available():
torch.xpu.empty_cache()
else:
torch.cuda.empty_cache()
print(f"Processing complete. Results stored in {output_path}")
# Function to process a single text file
def process_file(file_path, model, tokenizer, output_path):
"""
Process a single text file by querying the model for questions and answers.
Args:
file_path (str): Path to the text file
model: The DeepSeek model
tokenizer: The DeepSeek tokenizer
output_path (str): Path to store results
"""
# Read the file content
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
except Exception as e:
print(f"Error reading {file_path}: {e}")
return
# Skip empty files
if not content.strip():
print(f"Skipping empty file: {file_path}")
return
# Generate a relative output path that maintains the directory structure
relative_path = os.path.relpath(file_path, start=os.path.dirname(output_path))
output_file = os.path.join(output_path, relative_path + ".json")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
# Handle potential token length issues by truncating if necessary
# DeepSeek has a context window limit, truncate if needed
max_content_length = 100000 # Adjust based on model's context length limit
if len(content) > max_content_length:
content = content[:max_content_length] + "... [Content truncated due to length]"
# Query model for questions
questions = get_questions_from_model(content, model, tokenizer)
print(f"{len(questions)} questions generated for {file_path}")
# Get answers for each question
results = {
"file_path": file_path,
"processed_at": datetime.now().isoformat(),
"context": content[:1000] + "..." if len(content) > 1000 else content, # Truncated context for JSON storage
"qa_pairs": []
}
# Process each question
for i, question in enumerate(questions):
print(f"Generating answer for question {i+1}/{len(questions)}: {question}")
answer = get_answer_from_model(content, question, model, tokenizer)
print(f"Answer: {answer[:50] + '...' if len(answer) > 50 else answer}")
print(f"Evaluating response...")
validation = validate_question_answer_from_model(content, question, answer, model, tokenizer)
print(f"Evaluation: {validation[:50] + '...' if len(validation) > 50 else validation}")
results["qa_pairs"].append({
"question": question,
"answer": answer,
"validation": validation,
})
# Save results after each generate pass so they can be evaluated
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# Function to get questions from the model
def get_questions_from_model(context, model, tokenizer):
"""
Query the model to generate questions about the provided context.
Args:
context (str): The text file content
model: The DeepSeek model
tokenizer: The DeepSeek tokenizer
Returns:
list: List of questions about the context
"""
# Set up the prompt for generating questions
prompt = f"""<task>
You are an expert data scientist creating a training dataset. I'll provide context information about a person from their resume. Your task is to generate 10 diverse, realistic questions that someone might ask about this person.
IMPORTANT: DO NOT return JSON or any structured format. Respond with a simple numbered list of questions only, with no formatting, no JSON, and no additional text.
Generate questions that:
- Vary in complexity (simple factual questions, complex reasoning questions)
- Cover different aspects of the context (experience, skills, education, achievements)
- Include both specific and general inquiries
- Sound natural, as if asked by a real person
- Avoid asking for information not present in the context
Context:
{context}
Return ONLY a plain numbered list like:
1. First question?
2. Second question?
...and so on.
Do not include any explanations, JSON, or other formatting.
</task>
"""
# Generate questions
raw_response = generate_with_deepseek(prompt, model, tokenizer)
# Parse the response to get the actual questions
response = parse_deepseek_response(raw_response)
lines = response.strip().split('\n')
# Transform lines like "1. What is your name?" into just "What is your name?" (only include lines that end with a question mark)
questions = [re.sub(r'^\d+\.\s*', '', line.strip()) for line in lines if line.strip() and line.strip().endswith('?')]
if len(questions) == 0:
print(response)
exit(0)
return questions
# Function to get answers from the model
def get_answer_from_model(context, question, model, tokenizer):
"""
Query the model to answer a question about the provided context.
Args:
context (str): The text file content
question (str): The question to answer
model: The DeepSeek model
tokenizer: The DeepSeek tokenizer
Returns:
str: The model's answer to the question
"""
# Set up the prompt for answering the question
prompt = f"""<task>
You are an AI assistant being fine-tuned to accurately represent a specific person based on their resume. Below is a question about this person and the context from their resume.
Your task is to provide a comprehensive, accurate answer that:
- Only uses information explicitly stated in the context
- Doesn't fabricate or assume additional details
- Maintains a professional, helpful tone
- Clearly states if the question cannot be answered based on the given context
- Structures the response in a natural, conversational way
Context:
{context}
Question:
{question}
Answer:
</task>
"""
# Generate answer
raw_answer = generate_with_deepseek(prompt, model, tokenizer)
# Parse the response to get the actual answer
answer = parse_deepseek_response(raw_answer)
return answer
# Example usage
if __name__ == "__main__":
process_directory(
directory_path="../doc/", # Replace with your directory path
output_path="../results",
file_extensions=(".txt", ".md"), # Process both txt and md files
batch_size=5 # Clear cache after every 5 files
)

View File

@ -1,25 +0,0 @@
# web_server.py - Web interface using Flask
from rag_core import RagCore
from rag_cli import RagCLI
from flask import Flask, request, jsonify, render_template
# main.py - Entry point that allows choosing between CLI and web interface
def main():
import argparse
parser = argparse.ArgumentParser(description='RAG Application')
parser.add_argument('--debug', action='store_true', help='Run in debug mode')
args = parser.parse_args()
# Create the core RAG application
core = RagCore()
# Run CLI interface
cli = RagCLI(core)
cli.run()
if __name__ == "__main__":
main()

38
src/client/.gitignore vendored
View File

@ -1,38 +0,0 @@
# Learn more https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files
# dependencies
node_modules/
# Expo
.expo/
dist/
web-build/
expo-env.d.ts
# Native
*.orig.*
*.jks
*.p8
*.p12
*.key
*.mobileprovision
# Metro
.metro-health-check*
# debug
npm-debug.*
yarn-debug.*
yarn-error.*
# macOS
.DS_Store
*.pem
# local env files
.env*.local
# typescript
*.tsbuildinfo
app-example

View File

@ -1,50 +0,0 @@
# Welcome to your Expo app 👋
This is an [Expo](https://expo.dev) project created with [`create-expo-app`](https://www.npmjs.com/package/create-expo-app).
## Get started
1. Install dependencies
```bash
npm install
```
2. Start the app
```bash
npx expo start
```
In the output, you'll find options to open the app in a
- [development build](https://docs.expo.dev/develop/development-builds/introduction/)
- [Android emulator](https://docs.expo.dev/workflow/android-studio-emulator/)
- [iOS simulator](https://docs.expo.dev/workflow/ios-simulator/)
- [Expo Go](https://expo.dev/go), a limited sandbox for trying out app development with Expo
You can start developing by editing the files inside the **app** directory. This project uses [file-based routing](https://docs.expo.dev/router/introduction).
## Get a fresh project
When you're ready, run:
```bash
npm run reset-project
```
This command will move the starter code to the **app-example** directory and create a blank **app** directory where you can start developing.
## Learn more
To learn more about developing your project with Expo, look at the following resources:
- [Expo documentation](https://docs.expo.dev/): Learn fundamentals, or go into advanced topics with our [guides](https://docs.expo.dev/guides).
- [Learn Expo tutorial](https://docs.expo.dev/tutorial/introduction/): Follow a step-by-step tutorial where you'll create a project that runs on Android, iOS, and the web.
## Join the community
Join our community of developers creating universal apps.
- [Expo on GitHub](https://github.com/expo/expo): View our open source platform and contribute.
- [Discord community](https://chat.expo.dev): Chat with Expo users and ask questions.

View File

@ -1,36 +0,0 @@
{
"expo": {
"name": "Ketr-Chat",
"slug": "ketr-chat",
"version": "1.0.0",
"orientation": "portrait",
"icon": "./assets/icon.png",
"userInterfaceStyle": "light",
"splash": {
"image": "./assets/splash.png",
"resizeMode": "contain",
"backgroundColor": "#ffffff"
},
"assetBundlePatterns": [
"**/*"
],
"web": {
"favicon": "./assets/favicon.png",
"bundler": "metro"
},
"scheme": "myapp",
"packagerOpts": {
"hostType": "lan",
"dev": true,
"minify": false
},
"experiments": {
"tsconfigPaths": true
},
"extra": {
"router": {
"origin": false
}
}
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1,58 +0,0 @@
{
"name": "airc",
"main": "expo-router/entry",
"version": "1.0.0",
"scripts": {
"start": "expo start",
"direct": "react-native start --reset-cache",
"android": "expo run:android",
"ios": "expo run:ios",
"web": "expo start --web"
},
"jest": {
"preset": "jest-expo"
},
"dependencies": {
"@expo/vector-icons": "^14.0.2",
"@react-native/metro-config": "^0.78.1",
"@react-navigation/bottom-tabs": "^7.2.0",
"@react-navigation/native": "^7.0.14",
"expo": "~52.0.37",
"expo-blur": "~14.0.3",
"expo-constants": "~17.0.7",
"expo-font": "~13.0.4",
"expo-haptics": "~14.0.1",
"expo-linking": "~7.0.5",
"expo-router": "~4.0.17",
"expo-splash-screen": "~0.29.22",
"expo-status-bar": "~2.0.1",
"expo-symbols": "~0.2.2",
"expo-system-ui": "~4.0.8",
"expo-web-browser": "~14.0.2",
"react": "18.3.1",
"react-dom": "18.3.1",
"react-fontawesome": "^1.7.1",
"react-helmet": "^6.1.0",
"react-native": "0.76.7",
"react-native-gesture-handler": "~2.20.2",
"react-native-markdown-display": "^7.0.2",
"react-native-reanimated": "~3.16.1",
"react-native-safe-area-context": "4.12.0",
"react-native-screens": "~4.4.0",
"react-native-web": "~0.19.13",
"react-native-webview": "13.12.5",
"react-spinners": "^0.15.0"
},
"devDependencies": {
"@babel/core": "^7.25.2",
"@react-native-community/cli": "^18.0.0",
"@types/jest": "^29.5.12",
"@types/react": "~18.3.12",
"@types/react-test-renderer": "^18.3.0",
"jest": "^29.2.1",
"jest-expo": "~52.0.4",
"react-test-renderer": "18.3.1",
"typescript": "^5.3.3"
},
"private": true
}

View File

@ -1,17 +0,0 @@
{
"extends": "expo/tsconfig.base",
"compilerOptions": {
"strict": true,
"paths": {
"@/*": [
"./*"
]
}
},
"include": [
"**/*.ts",
"**/*.tsx",
".expo/types/**/*.ts",
"expo-env.d.ts"
]
}

View File

@ -0,0 +1,279 @@
# ze-monitor
A small utility to monitor Level Zero devices via
[Level Zero Sysman](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/sysman/PROG.html#sysman-programming-guide)
from the command line, similar to 'top'.
# Installation
Requires Ubuntu Oracular 24.10.
## Easiest
### Install prerequisites
This will add the [Intel Graphics Preview PPA](https://github.com/canonical/intel-graphics-preview) and install the required dependencies:
```bash
sudo apt-get install -y \
software-properties-common \
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& sudo apt-get update \
&& sudo apt-get install -y \
libze1 libze-intel-gpu1 libncurses6
```
### Install ze-monitor from .deb package
This will download the ze-monitor GitHub, install it, and add the current
user to the 'ze-monitor' group to allow running the utility:
```bash
version=0.3.0-1
wget https://github.com/jketreno/ze-monitor/releases/download/v${version}/ze-monitor-${version}_amd64.deb
sudo dpkg -i ze-monitor-${version}_amd64.deb
sudo usermod -a -G ze-monitor $(whoami)
newgrp ze-monitor
```
Congratulations! You can run ze-monitor:
```bash
ze-monitor
```
You should see something like:
```bash
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
```
To monitor a device:
```bash
ze-monitor --device 2
```
Check the docs (`man ze-monitor`) for additional details on running the ze-monitor utility.
## Slightly more involved
This project uses docker containers to build. As this was originally written to monitor an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10). It will monitor any Level Zero device, even those using the i915 driver.
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
```
git clone https://github.com/jketreno/ze-monitor.git
cd ze-monitor
docker compose build
sudo apt install libze1 libncurses6
version=$(cat src/version.txt)
docker compose run --remove-orphans --rm \
ze-monitor \
cp /opt/ze-monitor-static/build/ze-monitor-${version}_amd64.deb \
/opt/ze-monitor/build
sudo dpkg -i build/ze-monitor-${version}_amd64.deb
```
# Security
In order for ze-monitor to read the performance metric units (PMU) in the Linux kernel, it needs elevated permissions. The easiest way is to install the .deb package and add the user to the ze-monitor group. Or, run under sudo (eg., `sudo ze-monitor ...`.)
The specific capabilities required to monitor the GPU are documented in [Perf Security](https://www.kernel.org/doc/html/v5.1/admin-guide/perf-security.html) and [man capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). These include:
| Capability | Reason |
|:--------------------|:-----------------------------------------------------|
| CAP_DAC_READ_SEARCH | Bypass all filesystem read access checks |
| CAP_PERFMON | Access to perf_events (vs. overloaded CAP_SYS_ADMIN) |
| CAP_SYS_PTRACE | PTRACE_MODE_READ_REALCREDS ptrace access mode check |
To configure ze-monitor to run with those privileges, you can use `setcap` to set the correct capabilities on ze-monitor. You can further secure your system by creating a user group specifically for running the utility and restrict running of that command to users in that group. That is what the .deb package does.
If you install the .deb package from a [Release](https://github.com/jketreno/ze-monitor/releases) or by building it, that package will set the appropriate permissions for ze-monitor on installation and set it executable only to those in the 'ze-monitor' group.
## Anyone can run ze-monitor
If you build from source and want to set the capabilities:
```bash
sudo setcap "cap_perfmon,cap_dac_read_search,cap_sys_ptrace=ep" build/ze-monitor
getcap build/ze-monitor
```
Any user can then run `build/ze-monitor` and monitor the GPU.
# Build outside container
## Prerequisites
If you would like to build outside of docker, you need the following packages installed:
```
sudo apt-get install -y \
build-essential \
libfmt-dev \
libncurses-dev
```
In addition, you need the Intel drivers installed, which are available from the `kobuk-team/intel-graphics` PPA:
```
sudo apt-get install -y \
software-properties-common \
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& sudo apt-get update \
&& sudo apt-get install -y \
libze-intel-gpu1 \
libze1 \
libze-dev
```
## Building
```
cd build
cmake ..
make
```
## Running
```
build/ze-monitor
```
## Build and install .deb
In order to build the .deb package, you need the following packages installed:
```bash
sudo apt-get install -y \
debhelper \
devscripts \
rpm \
rpm2cpio
```
You can then build the .deb:
```bash
if [ -d build ]; then
cd build
fi
version=$(cat ../src/version.txt)
cpack
sudo dpkg -i build/packages/ze-monitor_${version}_amd64.deb
```
You can then run ze-monitor from your path:
```bash
ze-monitor
```
# Developing
To run the built binary without building a full .deb package, you can build and run on the host by compiling in the container:
```
docker compose run --rm ze-monitor build.sh
build/ze-monitor
```
The build.sh script will build the binary in /opt/ze-monitor/build, which is volume mounted to the host's build directory.
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
# Running
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
If running within a docker container, the container environment does not have access to the host's `/proc/fd`, which is necessary to obtain information about the processes outside the current container which are using the GPU. As such, only processes running within that container running ze-monitor will be listed as using the GPU.
## List available devices
```
ze-monitor
```
Example output:
```bash
$ ze-monitor
Device 1: 8086:E20B (Intel(R) Graphics [0xe20b])
Device 2: 8086:A780 (Intel(R) UHD Graphics 770)
```
## Show details for a given device
```
sudo ze-monitor --info --device ( PCIID | # | BDF | UUID | /dev/dri/render*)
```
Example output:
```bash
$ sudo ze-monitor --device 2 --info
Device: 8086:A780 (Intel(R) UHD Graphics 770)
UUID: 868080A7-0400-0000-0002-000000000000
BDF: 0000:0000:0002:0000
PCI ID: 8086:A780
Subdevices: 0
Serial Number: unknown
Board Number: unknown
Brand Name: unknown
Model Name: Intel(R) UHD Graphics 770
Vendor Name: Intel(R) Corporation
Driver Version: 0CB7EFCAD5695B7EC5C8CE6
Type: GPU
Is integrated with host: Yes
Is a sub-device: No
Supports error correcting memory: No
Supports on-demand page-faulting: No
Engines: 7
Engine 1: ZES_ENGINE_GROUP_RENDER_SINGLE
Engine 2: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
Engine 3: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
Engine 4: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
Engine 5: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
Engine 6: ZES_ENGINE_GROUP_COPY_SINGLE
Engine 7: ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE
Temperature Sensors: 0
```
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
## Monitor a given device
```
sudo ze-monitor --device ( PCIID | # | BDF | UUID | /dev/dri/render* ) \
--interval ms
```
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
Output:
```bash
$ sudo ze-monitor --device 2 --interval 500
Device: 8086:E20B (Intel(R) Graphics [0xe20b])
Total Memory: 12809404416
Free memory: [# 55% ############################ ]
Power usage: 165.0W
------------------------------------------------------------------------------------------
PID COMMAND-LINE
USED MEMORY SHARED MEMORY ENGINE FLAGS
------------------------------------------------------------------------------------------
1 /sbin/init splash
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
1606 /usr/lib/systemd/systemd-logind
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
5164 /usr/bin/gnome-shell
MEM: 530513920 SHR: 503316480 FLAGS: RENDER COMPUTE
5237 /usr/bin/Xwayland :1024 -rootless -nores...isplayfd 6 -initfd 7 -byteswappedclients
MEM: 0 SHR: 0 FLAGS:
40480 python chat.py
MEM: 5544226816 SHR: 0 FLAGS: DMA COMPUTE
```
If you pass `--one-shot`, statistics will be gathered, displayed, and then ze-monitor will exit.

View File

@ -1,44 +0,0 @@
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import torch
from transformers import AutoTokenizer
import argparse
current_dir = os.path.dirname(os.path.realpath(__file__))
common_util_path = os.path.join(current_dir, '..')
import sys
sys.path.append(common_util_path)
from common.utils import merge_adapter
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Merge the adapter into the original model for Llama2 model')
parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-hf",
help='The huggingface repo id for the Llama2 (e.g. `meta-llama/Llama-2-7b-hf` and `meta-llama/Llama-2-13b-chat-hf`) to be downloaded'
', or the path to the huggingface checkpoint folder')
parser.add_argument('--adapter_path', type=str,)
parser.add_argument('--output_path', type=str,)
args = parser.parse_args()
base_model = model_path = args.repo_id_or_model_path
adapter_path = args.adapter_path
output_path = args.output_path
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
merge_adapter(base_model, tokenizer, adapter_path, output_path)
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')

View File

@ -1,202 +0,0 @@
---
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
library_name: peft
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.14.0

View File

@ -1,37 +0,0 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"bias": "none",
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_bias": false,
"lora_dropout": 0.05,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"o_proj",
"q_proj",
"gate_proj",
"k_proj",
"down_proj",
"up_proj",
"v_proj"
],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -1,32 +0,0 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 100,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [],
"logging_steps": 100,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 763304356085760.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}

View File

@ -1,202 +0,0 @@
---
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
library_name: peft
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.14.0

View File

@ -1,37 +0,0 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"bias": "none",
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_bias": false,
"lora_dropout": 0.05,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"v_proj",
"o_proj",
"gate_proj",
"down_proj",
"q_proj",
"up_proj",
"k_proj"
],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -1,32 +0,0 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 100,
"global_step": 3,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [],
"logging_steps": 100,
"max_steps": 3,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 228991306825728.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}

View File

@ -1,202 +0,0 @@
---
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
library_name: peft
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.14.0

View File

@ -1,37 +0,0 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"bias": "none",
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_bias": false,
"lora_dropout": 0.05,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"up_proj",
"k_proj",
"o_proj",
"down_proj",
"v_proj",
"gate_proj",
"q_proj"
],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -1,32 +0,0 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.592592592592592,
"eval_steps": 100,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [],
"logging_steps": 100,
"max_steps": 60,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4427165265297408.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}

View File

@ -1,202 +0,0 @@
---
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
library_name: peft
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.14.0

View File

@ -1,37 +0,0 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"bias": "none",
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_bias": false,
"lora_dropout": 0.05,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"up_proj",
"k_proj",
"o_proj",
"down_proj",
"v_proj",
"gate_proj",
"q_proj"
],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -1,23 +0,0 @@
{
"bos_token": {
"content": "<begin▁of▁sentence>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<end▁of▁sentence>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<end▁of▁sentence>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,195 +0,0 @@
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"151643": {
"content": "<end▁of▁sentence>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<User>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151645": {
"content": "<Assistant>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151646": {
"content": "<begin▁of▁sentence>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|EOT|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151648": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151649": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<begin▁of▁sentence>",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<User>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<Assistant><tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<tool▁call▁end>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<tool▁call▁end>'}}{{'<tool▁calls▁end><end▁of▁sentence>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<tool▁outputs▁end>' + message['content'] + '<end▁of▁sentence>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<Assistant>' + content + '<end▁of▁sentence>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<tool▁outputs▁begin><tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<tool▁outputs▁end>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<Assistant><think>\\n'}}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<end▁of▁sentence>",
"extra_special_tokens": {},
"legacy": true,
"model_max_length": 16384,
"pad_token": "<end▁of▁sentence>",
"sp_model_kwargs": {},
"tokenizer_class": "LlamaTokenizerFast",
"unk_token": null,
"use_default_system_prompt": false
}

View File

@ -1,258 +0,0 @@
import os
import json
import torch
from pathlib import Path
from datetime import datetime
from tqdm import tqdm
from ipex_llm.transformers import AutoModelForCausalLM
import transformers
import logging as log
import time
import re
# Function to recursively walk a directory and process text files
def process_directory(directory_path, model, output_path="results", file_extensions=(".txt",)):
"""
Recursively walks a directory, processes text files, and stores results.
Args:
directory_path (str): Path to directory containing text files
model: The LLM model to use for queries
output_path (str): Path to store results
file_extensions (tuple): File extensions to process
"""
# Create output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)
# Use tqdm for progress tracking
file_paths = []
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(file_extensions):
file_paths.append(os.path.join(root, file))
for file_path in tqdm(file_paths, desc="Processing files"):
process_file(file_path, model, output_path)
print(f"Processing complete. Results stored in {output_path}")
# Function to process a single text file
def process_file(file_path, model, output_path):
"""
Process a single text file by querying the model for questions and answers.
Args:
file_path (str): Path to the text file
model: The LLM model to use for queries
output_path (str): Path to store results
"""
# Read the file content
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
except Exception as e:
print(f"Error reading {file_path}: {e}")
return
# Skip empty files
if not content.strip():
print(f"Skipping empty file: {file_path}")
return
# Generate a relative output path that maintains the directory structure
relative_path = os.path.relpath(file_path, start=os.path.dirname(output_path))
sidecar_file = os.path.join(output_path, relative_path + ".json")
sidecar_qa_file = os.path.join(output_path, relative_path + ".qa.json")
os.makedirs(os.path.dirname(sidecar_file), exist_ok=True)
print(f"Generating questions for {file_path}")
# Query model for questions
questions, think = get_questions_from_model(content, model)
# Get answers for each question
results = {
"file_path": file_path,
"processed_at": datetime.now().isoformat(),
"context": content[:1000] + "..." if len(content) > 1000 else content, # Truncated context for JSON storage
"think": think,
"qa_pairs": len(questions)
}
with open(sidecar_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# Process each question
results = []
for i, question in enumerate(questions):
print(f"Generating answer for {i+1}/{len(questions)}: {question}")
answer, think = get_answer_from_model(content, question, model)
results.append({
"think": think,
"question": question,
"answer": answer
})
# Save results -- do this in loop so we can monitor the progress
with open(sidecar_qa_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# Function to get questions from the model
def get_questions_from_model(context, model):
"""
Query the model to generate questions about the provided context.
Args:
context (str): The text file content
model: The LLM model to use for queries
Returns:
list: List of questions about the context
"""
# Set up the prompt for generating questions
prompt = f"""<task>
You are an expert at generating insightful questions about text content.
Please read the following text and generate 10 thoughtful questions that would help someone
understand the key information, concepts, or insights contained in it.
TEXT:
{context}
Generate a list of questions, one per line.
</task>
"""
# Use context manager to avoid CUDA memory leaks
with torch.no_grad():
# This is where we would query the model - implementation depends on model type
response, think, duration = model.generate(prompt)
# Parse questions from the response
# This parsing logic may need adjustment based on model output format
questions = [q.strip() for q in response.strip().split('\n') if q.strip()]
# Filter out any non-questions
questions = [q for q in questions if q.endswith('?')]
# Remove any ^\s*{0-9}+.\s* from each entry
questions = [re.sub(r"^\s*\d+\.\s*", "", q) for q in questions]
return questions, think
# Function to get answers from the model
def get_answer_from_model(context, question, model):
"""
Query the model to answer a question about the provided context.
Args:
context (str): The text file content
question (str): The question to answer
model: The LLM model to use for queries
Returns:
str: The model's answer to the question
"""
# Set up the prompt for answering the question
prompt = f"""<task>
Please answer the following question based solely on the provided context.
CONTEXT:
{context}
QUESTION:
{question}
</task>
"""
# Use context manager to avoid CUDA memory leaks
with torch.no_grad():
# Query the model
answer, think, duration = model.generate(prompt)
return answer, think
class LLM:
def __init__(self, model_path='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', device_name='xpu'):
self.model_path = model_path
self.device_name = device_name
self.tokenizer = None
self.model = None
try:
log.info(f"Loading tokenizer from: {self.model_path}")
start = datetime.now()
self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad_token to eos_token if needed
self.model = AutoModelForCausalLM.from_pretrained(self.model_path,
load_in_4bit=True,
optimize_model=True,
trust_remote_code=True,
use_cache=True)
self.model = self.model.half().to(device_name)
except Exception as e:
log.error(f"Loading error: {e}")
raise Exception(e)
# returns response, think, timedelta
def generate(self, prompt, max_length=4096):
if not self.tokenizer or not self.model:
raise Exception("tokenizer or model is None")
start = datetime.now()
response = None
think = None
# print(f"{'='*10} PROMPT {'='*10}\n{prompt}")
with torch.autocast(self.device_name, dtype=torch.float16):
inputs = self.tokenizer.encode_plus(
prompt,
return_tensors="pt"
).to(self.device_name)
with torch.no_grad():
generation_output = self.model.generate(
**inputs,
max_new_tokens=max_length,
do_sample=True,
temperature=0.7,
top_p=0.9,
return_dict_in_generate=True,
output_scores=False,
pad_token_id=self.tokenizer.eos_token_id
)
# Get the full output token IDs
output_token_ids = generation_output.sequences[0]
# Decode the full generated text (including the prompt)
full_output = self.tokenizer.decode(output_token_ids, skip_special_tokens=True)
# Extract only the response part by removing the prompt part
response = full_output[len(self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
matches = re.match(r'^(<think>)?(.*?)</think>(.*)$', response, flags=re.DOTALL)
if matches:
think = matches[2]
response = matches[3]
else:
think = None
response = response
# print(f"{'='*10} THINK {'='*10}\n{think}")
# print(f"{'='*10} RESPONSE {'='*10}\n{response}")
end = datetime.now()
return response, think, end - start
# Example usage
if __name__ == "__main__":
# Create a dummy model instance - would be replaced by actual model in real usage
model = LLM()
# Process the directory
process_directory(
directory_path="./doc", # Replace with your directory path
model=model,
output_path="./results",
file_extensions=(".txt", ".md") # Process both txt and md files
)

View File

@ -1,144 +0,0 @@
import intel_extension_for_pytorch as ipex
import inspect
def inspect_module(module, prefix=""):
"""Recursively inspect a module and list all its attributes and functions."""
results = {
"functions": [],
"attributes": [],
"classes": [],
"modules": []
}
for name in dir(module):
# Skip private/hidden attributes
if name.startswith("_") and name != "__version__":
continue
full_name = f"{prefix}.{name}" if prefix else name
try:
attr = getattr(module, name)
# Check if it's a function
if inspect.isfunction(attr) or inspect.isbuiltin(attr) or inspect.ismethod(attr):
results["functions"].append((full_name, str(inspect.signature(attr)) if not inspect.isbuiltin(attr) else ""))
# Check if it's a class
elif inspect.isclass(attr):
results["classes"].append(full_name)
# Check if it's a module
elif inspect.ismodule(attr):
results["modules"].append(full_name)
# Otherwise it's a regular attribute
else:
try:
attr_value = str(attr)
if len(attr_value) > 100:
attr_value = attr_value[:100] + "..."
results["attributes"].append((full_name, attr_value))
except:
results["attributes"].append((full_name, "Unable to get value"))
except Exception as e:
results["attributes"].append((full_name, f"Error: {str(e)}"))
return results
# Inspect ipex
ipex_info = inspect_module(ipex)
# Print information in a readable format
print("INTEL PYTORCH EXTENSION (IPEX) API\n")
print("=== VERSION ===")
if "__version__" in dir(ipex):
print(f"Version: {ipex.__version__}")
else:
print("Version information not available")
print()
print("=== FUNCTIONS ===")
for func, sig in sorted(ipex_info["functions"]):
print(f"{func}{sig}")
print()
print("=== ATTRIBUTES ===")
for attr, value in sorted(ipex_info["attributes"]):
print(f"{attr} = {value}")
print()
print("=== CLASSES ===")
for cls in sorted(ipex_info["classes"]):
print(cls)
print()
print("=== MODULES ===")
for mod in sorted(ipex_info["modules"]):
print(mod)
print()
# Deep inspection of nn module which is likely important for quantization
if hasattr(ipex, "nn"):
print("=== DIVING INTO ipex.nn MODULE ===")
nn_info = inspect_module(ipex.nn, "ipex.nn")
print("--- ipex.nn FUNCTIONS ---")
for func, sig in sorted(nn_info["functions"]):
print(f"{func}{sig}")
print()
print("--- ipex.nn CLASSES ---")
for cls in sorted(nn_info["classes"]):
print(cls)
print()
# Dive into utils if available
if hasattr(ipex.nn, "utils"):
print("=== DIVING INTO ipex.nn.utils MODULE ===")
utils_info = inspect_module(ipex.nn.utils, "ipex.nn.utils")
print("--- ipex.nn.utils FUNCTIONS ---")
for func, sig in sorted(utils_info["functions"]):
print(f"{func}{sig}")
print()
print("--- ipex.nn.utils CLASSES ---")
for cls in sorted(utils_info["classes"]):
print(cls)
print()
# Look for parameter wrapper specifically
if "_parameter_wrapper" in dir(ipex.nn.utils):
print("=== DIVING INTO ipex.nn.utils._parameter_wrapper MODULE ===")
pw_info = inspect_module(ipex.nn.utils._parameter_wrapper, "ipex.nn.utils._parameter_wrapper")
print("--- ipex.nn.utils._parameter_wrapper FUNCTIONS ---")
for func, sig in sorted(pw_info["functions"]):
print(f"{func}{sig}")
print()
print("--- ipex.nn.utils._parameter_wrapper CLASSES ---")
for cls in sorted(pw_info["classes"]):
print(cls)
print()
# Specifically look for functions related to quantization
print("=== FUNCTIONS RELATED TO QUANTIZATION ===")
quant_funcs = []
for module_name in dir(ipex):
if not module_name.startswith("_"):
module = getattr(ipex, module_name)
if inspect.ismodule(module):
for func_name in dir(module):
if any(term in func_name.lower() for term in ["quant", "bit", "int8", "int4", "precision"]):
try:
func = getattr(module, func_name)
if inspect.isfunction(func) or inspect.isbuiltin(func):
quant_funcs.append(f"ipex.{module_name}.{func_name}")
except:
pass
for func in sorted(quant_funcs):
print(func)

View File

@ -1,23 +0,0 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# production
/build
# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*

View File

@ -1,46 +0,0 @@
# Getting Started with Create React App
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
## Available Scripts
In the project directory, you can run:
### `npm start`
Runs the app in the development mode.\
Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
The page will reload if you make edits.\
You will also see any lint errors in the console.
### `npm test`
Launches the test runner in the interactive watch mode.\
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
### `npm run build`
Builds the app for production to the `build` folder.\
It correctly bundles React in production mode and optimizes the build for the best performance.
The build is minified and the filenames include the hashes.\
Your app is ready to be deployed!
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
### `npm run eject`
**Note: this is a one-way operation. Once you `eject`, you cant go back!**
If you arent satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point youre on your own.
You dont have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldnt feel obligated to use this feature. However we understand that this tool wouldnt be useful if you couldnt customize it when you are ready for it.
## Learn More
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
To learn React, check out the [React documentation](https://reactjs.org/).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1,56 +0,0 @@
{
"name": "ketr-chat",
"version": "0.1.0",
"private": true,
"dependencies": {
"@emotion/react": "^11.14.0",
"@emotion/styled": "^11.14.0",
"@fontsource/roboto": "^5.2.5",
"@mui/icons-material": "^7.0.1",
"@mui/material": "^7.0.1",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.2.0",
"@testing-library/user-event": "^13.5.0",
"@types/jest": "^27.5.2",
"@types/node": "^16.18.126",
"@types/react": "^19.0.12",
"@types/react-dom": "^19.0.4",
"mui-markdown": "^1.2.6",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-markdown": "^10.1.0",
"react-plotly.js": "^2.6.0",
"react-scripts": "5.0.1",
"react-spinners": "^0.15.0",
"rehype-katex": "^7.0.1",
"remark-gfm": "^4.0.1",
"remark-math": "^6.0.0",
"typescript": "^4.9.5",
"web-vitals": "^2.1.4"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 318 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

View File

@ -1,43 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta
name="description"
content="Interactive chat with an enhanced LLM."
/>
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
<!--
manifest.json provides metadata used when your web app is installed on a
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
-->
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
<!--
Notice the use of %PUBLIC_URL% in the tags above.
It will be replaced with the URL of the `public` folder during the build.
Only files inside the `public` folder can be referenced from the HTML.
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>Ketr-Chat</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
To begin the development, run `npm start` or `yarn start`.
To create a production bundle, use `npm run build` or `yarn build`.
-->
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

View File

@ -1,25 +0,0 @@
{
"short_name": "Ketr-Chat",
"name": "Ketrenos AI Chat",
"icons": [
{
"src": "favicon.ico",
"sizes": "64x64 32x32 24x24 16x16",
"type": "image/x-icon"
},
{
"src": "logo192.png",
"type": "image/png",
"sizes": "192x192"
},
{
"src": "logo512.png",
"type": "image/png",
"sizes": "512x512"
}
],
"start_url": ".",
"display": "standalone",
"theme_color": "#000000",
"background_color": "#ffffff"
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

View File

@ -1,3 +0,0 @@
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

View File

@ -55,7 +55,7 @@ import '@fontsource/roboto/700.css';
const use_mui_markdown = true
const welcomeMarkdown = `
# Welcome to Ketr-Chat
# Welcome to AIRC
This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
@ -1171,7 +1171,7 @@ const App = () => {
}}
>
<Toolbar>
<Tooltip title="Chat Settings">
<Tooltip title="LLM Settings">
<IconButton
color="inherit"
aria-label="open drawer"
@ -1195,7 +1195,7 @@ const App = () => {
</IconButton>
</Tooltip>
<Typography variant="h6" noWrap component="div">
Ketr-Chat
ai.ketrenos.com
</Typography>
{

View File

@ -1,15 +0,0 @@
body {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
padding: 0;
height: 100dvh;
}
code {
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
monospace;
}

View File

@ -1,19 +0,0 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import './index.css';
import App from './App';
import reportWebVitals from './reportWebVitals';
const root = ReactDOM.createRoot(
document.getElementById('root') as HTMLElement
);
root.render(
<React.StrictMode>
<App />
</React.StrictMode>
);
// If you want to start measuring performance in your app, pass a function
// to log results (for example: reportWebVitals(console.log))
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
reportWebVitals();

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>

Before

Width:  |  Height:  |  Size: 2.6 KiB

Some files were not shown because too many files have changed in this diff Show More