Working?
@ -216,15 +216,6 @@ RUN pip install 'https://github.com/bitsandbytes-foundation/bitsandbytes/release
|
||||
# Install ollama python module
|
||||
RUN pip install ollama langchain-ollama
|
||||
|
||||
# pydle does not work with newer asyncio due to coroutine
|
||||
# being deprecated. Patch to work.
|
||||
COPY /src/pydle.patch /opt/pydle.patch
|
||||
|
||||
RUN pip install pydle \
|
||||
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
|
||||
-p1 < /opt/pydle.patch \
|
||||
&& rm /opt/pydle.patch
|
||||
|
||||
RUN pip install setuptools --upgrade
|
||||
RUN pip install ollama langchain-ollama
|
||||
RUN pip install feedparser bs4 chromadb
|
||||
|
@ -1,444 +0,0 @@
|
||||
FROM ubuntu:oracular AS pytorch-build
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
# Instructions Dockerfied from:
|
||||
#
|
||||
# https://github.com/pytorch/pytorch
|
||||
#
|
||||
# and
|
||||
#
|
||||
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
|
||||
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
|
||||
#
|
||||
#
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
gpg \
|
||||
wget \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
|
||||
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
gpg-agent \
|
||||
less \
|
||||
libbz2-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
libreadline-dev \
|
||||
libssl-dev \
|
||||
libsqlite3-dev \
|
||||
llvm \
|
||||
nano \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# python3 \
|
||||
# python3-pip \
|
||||
# python3-venv \
|
||||
# python3-dev \
|
||||
|
||||
RUN /usr/sbin/update-ccache-symlinks
|
||||
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
||||
|
||||
# Build Python in /opt/..., install it locally, then remove the build environment
|
||||
# collapsed to a single docker layer.
|
||||
WORKDIR /opt
|
||||
ENV PYTHON_VERSION=3.11.9
|
||||
|
||||
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
|
||||
&& cd Python-${PYTHON_VERSION} \
|
||||
&& ./configure --prefix=/opt/python --enable-optimizations \
|
||||
&& make -j$(nproc) \
|
||||
&& make install \
|
||||
&& cd /opt \
|
||||
&& rm -rf Python-${PYTHON_VERSION}
|
||||
|
||||
WORKDIR /opt/pytorch
|
||||
|
||||
FROM ubuntu:oracular AS ze-monitor
|
||||
# From https://github.com/jketreno/ze-monitor
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
debhelper \
|
||||
devscripts \
|
||||
cmake \
|
||||
git \
|
||||
libfmt-dev \
|
||||
libncurses-dev \
|
||||
rpm \
|
||||
rpm2cpio \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN apt-get install -y \
|
||||
software-properties-common \
|
||||
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y \
|
||||
libze-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN git clone --depth 1 --branch v0.3.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor
|
||||
WORKDIR /opt/ze-monitor/build
|
||||
RUN cmake .. \
|
||||
&& make \
|
||||
&& cpack
|
||||
|
||||
FROM pytorch-build AS pytorch
|
||||
|
||||
COPY --from=pytorch-build /opt/pytorch /opt/pytorch
|
||||
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
|
||||
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libze-intel-gpu1 \
|
||||
libze1 \
|
||||
intel-ocloc \
|
||||
intel-opencl-icd \
|
||||
xpu-smi \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||
|
||||
# When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries.
|
||||
ENV SYCL_CACHE_PERSISTENT=1
|
||||
|
||||
WORKDIR /opt/pytorch
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/pytorch/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
} > /opt/pytorch/shell ; \
|
||||
chmod +x /opt/pytorch/shell
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/pytorch/venv
|
||||
|
||||
SHELL [ "/opt/pytorch/shell" ]
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||
RUN pip3 freeze > /opt/pytorch/requirements.txt
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'echo "Container: pytorch"' ; \
|
||||
echo 'set -e' ; \
|
||||
echo 'echo "Setting pip environment to /opt/pytorch"' ; \
|
||||
echo 'source /opt/pytorch/venv/bin/activate'; \
|
||||
echo 'if [[ "${1}" == "" ]] || [[ "${1}" == "shell" ]]; then' ; \
|
||||
echo ' echo "Dropping to shell"' ; \
|
||||
echo ' /bin/bash -c "source /opt/pytorch/venv/bin/activate ; /bin/bash"' ; \
|
||||
echo 'else' ; \
|
||||
echo ' exec "${@}"' ; \
|
||||
echo 'fi' ; \
|
||||
} > /entrypoint.sh \
|
||||
&& chmod +x /entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||
|
||||
FROM pytorch AS ipex-llm-src
|
||||
|
||||
# Build ipex-llm from source
|
||||
|
||||
RUN git clone --branch main --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm \
|
||||
&& cd /opt/ipex-llm \
|
||||
&& git fetch --depth 1 origin cb3c4b26ad058c156591816aa37eec4acfcbf765 \
|
||||
&& git checkout cb3c4b26ad058c156591816aa37eec4acfcbf765
|
||||
|
||||
WORKDIR /opt/ipex-llm
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
} > /opt/ipex-llm/shell ; \
|
||||
chmod +x /opt/ipex-llm/shell
|
||||
|
||||
SHELL [ "/opt/ipex-llm/shell" ]
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||
|
||||
WORKDIR /opt/ipex-llm/python/llm
|
||||
RUN pip install requests wheel
|
||||
RUN python setup.py clean --all bdist_wheel --linux
|
||||
|
||||
FROM airc AS jupyter
|
||||
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
|
||||
# BEGIN setup Jupyter
|
||||
RUN pip install jupyter \
|
||||
jupyterlab==4.3.0a0 \
|
||||
jupyterhub==5.0.0 \
|
||||
notebook==7.3.0a0 \
|
||||
"jupyter-server-proxy>=4.1.2"
|
||||
# END setup Jupyter
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'echo "Container: airc jupyter"' ; \
|
||||
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
|
||||
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
|
||||
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
|
||||
echo ' exit 1' ; \
|
||||
echo ' else' ; \
|
||||
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
|
||||
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
|
||||
echo ' fi' ; \
|
||||
echo 'fi' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
|
||||
echo 'while true; do' ; \
|
||||
echo ' echo "Launching jupyter notebook"' ; \
|
||||
echo ' jupyter notebook \' ; \
|
||||
echo ' --notebook-dir=/opt/jupyter \' ; \
|
||||
echo ' --port 8888 \' ; \
|
||||
echo ' --ip 0.0.0.0 \' ; \
|
||||
echo ' --no-browser \' ; \
|
||||
echo ' --allow-root \' ; \
|
||||
echo ' --ServerApp.token= \' ; \
|
||||
echo ' --ServerApp.password= \' ; \
|
||||
echo ' --ServerApp.allow_origin=* \' ; \
|
||||
echo ' --ServerApp.base_url="/jupyter" \' ; \
|
||||
echo ' "${@}" \' ; \
|
||||
echo ' 2>&1 | tee -a "/root/.cache/jupyter.log"' ; \
|
||||
echo ' echo "jupyter notebook died ($?). Restarting."' ; \
|
||||
echo ' sleep 5' ; \
|
||||
echo 'done' ; \
|
||||
} > /entrypoint-jupyter.sh \
|
||||
&& chmod +x /entrypoint-jupyter.sh
|
||||
|
||||
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
|
||||
|
||||
FROM pytorch AS airc
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/airc/venv
|
||||
|
||||
# Don't install the full oneapi essentials; just the ones that we seem to need
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
| gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
|
||||
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
|
||||
| tee /etc/apt/sources.list.d/oneAPI.list \
|
||||
&& apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
intel-oneapi-mkl-sycl-2025.0 \
|
||||
intel-oneapi-dnnl-2025.0 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||
echo 'if [[ "$1" == "" ]]; then bash -c; else bash -c "${@}"; fi' ; \
|
||||
} > /opt/airc/shell ; \
|
||||
chmod +x /opt/airc/shell
|
||||
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu
|
||||
# Install ipex-llm built in ipex-llm-src
|
||||
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
|
||||
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
|
||||
|
||||
COPY src/ /opt/airc/src/
|
||||
|
||||
# pydle does not work with newer asyncio due to coroutine
|
||||
# being deprecated. Patch to work.
|
||||
RUN pip3 install pydle transformers sentencepiece accelerate \
|
||||
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
|
||||
-p1 < /opt/airc/src/pydle.patch
|
||||
|
||||
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
|
||||
# as well as MistralSpda* things missing
|
||||
RUN pip install "sentence_transformers<3.4.1" "transformers==4.40.0"
|
||||
|
||||
# To get xe_linear and other Xe methods
|
||||
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
|
||||
|
||||
# trl.core doesn't have what is needed with the default 'pip install trl' version
|
||||
RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
|
||||
|
||||
# Needed by src/model-server.py
|
||||
RUN pip install flask
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'set -e' ; \
|
||||
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
|
||||
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
|
||||
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
|
||||
echo ' exit 1' ; \
|
||||
echo ' else' ; \
|
||||
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
|
||||
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
|
||||
echo ' fi' ; \
|
||||
echo 'fi' ; \
|
||||
echo 'echo "Container: airc"' ; \
|
||||
echo 'echo "Setting pip environment to /opt/airc"' ; \
|
||||
echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate'; \
|
||||
echo 'if [[ "${1}" == "shell" ]] || [[ "${1}" == "/bin/bash" ]]; then' ; \
|
||||
echo ' echo "Dropping to shell"' ; \
|
||||
echo ' /bin/bash -c "source /opt/airc/venv/bin/activate ; /bin/bash"' ; \
|
||||
echo ' exit $?' ; \
|
||||
echo 'else' ; \
|
||||
echo ' while true; do' ; \
|
||||
echo ' echo "Launching model-server"' ; \
|
||||
echo ' python src/model-server.py \' ; \
|
||||
echo ' 2>&1 | tee -a "/root/.cache/model-server.log"'; \
|
||||
echo ' echo "model-server died ($?). Restarting."' ; \
|
||||
echo ' sleep 5' ; \
|
||||
echo ' done &' ; \
|
||||
echo ' while true; do' ; \
|
||||
echo ' echo "Launching airc"' ; \
|
||||
echo ' python src/airc.py "${@}" \' ; \
|
||||
echo ' 2>&1 | tee -a "/root/.cache/airc.log"' ; \
|
||||
echo ' echo "airc died ($?). Restarting."' ; \
|
||||
echo ' sleep 5' ; \
|
||||
echo ' done' ; \
|
||||
echo 'fi' ; \
|
||||
} > /entrypoint-airc.sh \
|
||||
&& chmod +x /entrypoint-airc.sh
|
||||
|
||||
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
|
||||
RUN dpkg -i /opt/ze-monitor-*deb
|
||||
|
||||
WORKDIR /opt/airc
|
||||
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
|
||||
# Needed by src/model-server.py
|
||||
#RUN pip install faiss-cpu sentence_transformers feedparser bs4
|
||||
RUN pip install ollama feedparser bs4
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
ENTRYPOINT [ "/entrypoint-airc.sh" ]
|
||||
|
||||
FROM ubuntu:oracular AS ollama
|
||||
|
||||
# Get a couple prerequisites
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
gpg \
|
||||
wget \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# Install Intel graphics runtimes
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
|
||||
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libze-intel-gpu1 \
|
||||
libze1 \
|
||||
intel-ocloc \
|
||||
intel-opencl-icd \
|
||||
xpu-smi \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
WORKDIR /opt/ollama
|
||||
|
||||
# Download the nightly ollama release from ipex-llm
|
||||
RUN wget -qO - https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250226-ubuntu.tgz | \
|
||||
tar --strip-components=1 -C . -xzv
|
||||
|
||||
# Install Python from Oracular (ollama works with 3.12)
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
gpg \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
wget \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# Setup the ollama python virtual environment
|
||||
RUN python3 -m venv --system-site-packages /opt/ollama/venv
|
||||
|
||||
# Setup the docker pip shell
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/ollama/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
} > /opt/ollama/shell ; \
|
||||
chmod +x /opt/ollama/shell
|
||||
|
||||
# Activate the pip environment on all shell calls
|
||||
SHELL [ "/opt/ollama/shell" ]
|
||||
|
||||
# Install ollama python module
|
||||
RUN pip3 install ollama
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash'; \
|
||||
echo 'echo "Container: ollama"'; \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/ollama"'; \
|
||||
echo 'source /opt/ollama/venv/bin/activate'; \
|
||||
echo 'export OLLAMA_NUM_GPU=999'; \
|
||||
echo 'export ZES_ENABLE_SYSMAN=1'; \
|
||||
echo 'export SYCL_CACHE_PERSISTENT=1'; \
|
||||
echo 'export OLLAMA_KEEP_ALIVE=-1'; \
|
||||
echo 'export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1'; \
|
||||
echo ''; \
|
||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
|
||||
echo ' echo "Dropping to shell"'; \
|
||||
echo ' exec /bin/bash'; \
|
||||
echo 'else'; \
|
||||
echo ' echo "Launching Ollama server..."'; \
|
||||
echo ' exec ./ollama serve'; \
|
||||
echo 'fi'; \
|
||||
} > /entrypoint.sh \
|
||||
&& chmod +x /entrypoint.sh
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash'; \
|
||||
echo 'echo "Container: ollama"'; \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/ollama"'; \
|
||||
echo 'source /opt/ollama/venv/bin/activate'; \
|
||||
echo './ollama pull mxbai-embed-large' ; \
|
||||
echo './ollama pull deepseek-r1:7b' ; \
|
||||
} > /fetch-models.sh \
|
||||
&& chmod +x /fetch-models.sh
|
||||
|
||||
VOLUME [" /root/.ollama" ]
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
@ -21,6 +21,7 @@ services:
|
||||
- ./src:/opt/airc/src:rw
|
||||
- ./doc:/opt/airc/doc:ro
|
||||
- ./results:/opt/airc/results:rw
|
||||
- ./ketr.chat:/opt/airc/ketr-chat:rw # Live mount src
|
||||
cap_add: # used for running ze-monitor within container
|
||||
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
||||
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
||||
@ -76,6 +77,7 @@ services:
|
||||
- ./jupyter:/opt/jupyter:rw
|
||||
- ./cache:/root/.cache
|
||||
- ./src:/opt/airc/src:rw # Live mount src
|
||||
- ./ketr.chat:/opt/airc/ketr-chat:rw # Live mount src
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
|
1001
jupyter/irc.py
368
jupyter/tools.py
@ -1,368 +0,0 @@
|
||||
# %%
|
||||
# Imports [standard]
|
||||
# Standard library modules (no try-except needed)
|
||||
import argparse
|
||||
import asyncio
|
||||
import anyio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
def try_import(module_name, pip_name=None):
|
||||
try:
|
||||
__import__(module_name)
|
||||
except ImportError:
|
||||
print(f"Module '{module_name}' not found. Install it using:")
|
||||
print(f" pip install {pip_name or module_name}")
|
||||
|
||||
# Third-party modules with import checks
|
||||
try_import('gradio')
|
||||
try_import('ollama')
|
||||
try_import('openai')
|
||||
try_import('pydle')
|
||||
try_import('pytz')
|
||||
try_import('requests')
|
||||
try_import('yfinance', 'yfinance')
|
||||
try_import('dotenv', 'python-dotenv')
|
||||
try_import('geopy', 'geopy')
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from geopy.geocoders import Nominatim
|
||||
import gradio as gr
|
||||
import ollama
|
||||
import openai
|
||||
import pydle
|
||||
import pytz
|
||||
import requests
|
||||
import yfinance as yf
|
||||
|
||||
# %%
|
||||
def get_weather_by_location(city, state, country="USA"):
|
||||
"""
|
||||
Get weather information from weather.gov based on city, state, and country.
|
||||
|
||||
Args:
|
||||
city (str): City name
|
||||
state (str): State name or abbreviation
|
||||
country (str): Country name (defaults to "USA" as weather.gov is for US locations)
|
||||
|
||||
Returns:
|
||||
dict: Weather forecast information
|
||||
"""
|
||||
# Step 1: Get coordinates for the location using geocoding
|
||||
location = f"{city}, {state}, {country}"
|
||||
coordinates = get_coordinates(location)
|
||||
|
||||
if not coordinates:
|
||||
return {"error": f"Could not find coordinates for {location}"}
|
||||
|
||||
# Step 2: Get the forecast grid endpoint for the coordinates
|
||||
grid_endpoint = get_grid_endpoint(coordinates)
|
||||
|
||||
if not grid_endpoint:
|
||||
return {"error": f"Could not find weather grid for coordinates {coordinates}"}
|
||||
|
||||
# Step 3: Get the forecast data from the grid endpoint
|
||||
forecast = get_forecast(grid_endpoint)
|
||||
|
||||
if not forecast['location']:
|
||||
forecast['location'] = location
|
||||
|
||||
return forecast
|
||||
|
||||
def get_coordinates(location):
|
||||
"""Convert a location string to latitude and longitude using Nominatim geocoder."""
|
||||
try:
|
||||
# Create a geocoder with a meaningful user agent
|
||||
geolocator = Nominatim(user_agent="weather_app_example")
|
||||
|
||||
# Get the location
|
||||
location_data = geolocator.geocode(location)
|
||||
|
||||
if location_data:
|
||||
return {
|
||||
"latitude": location_data.latitude,
|
||||
"longitude": location_data.longitude
|
||||
}
|
||||
else:
|
||||
print(f"Location not found: {location}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error getting coordinates: {e}")
|
||||
return None
|
||||
|
||||
def get_grid_endpoint(coordinates):
|
||||
"""Get the grid endpoint from weather.gov based on coordinates."""
|
||||
try:
|
||||
lat = coordinates["latitude"]
|
||||
lon = coordinates["longitude"]
|
||||
|
||||
# Define headers for the API request
|
||||
headers = {
|
||||
"User-Agent": "WeatherAppExample/1.0 (your_email@example.com)",
|
||||
"Accept": "application/geo+json"
|
||||
}
|
||||
|
||||
# Make the request to get the grid endpoint
|
||||
url = f"https://api.weather.gov/points/{lat},{lon}"
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data["properties"]["forecast"]
|
||||
else:
|
||||
print(f"Error getting grid: {response.status_code} - {response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error in get_grid_endpoint: {e}")
|
||||
return None
|
||||
|
||||
# Weather related function
|
||||
|
||||
def get_forecast(grid_endpoint):
|
||||
"""Get the forecast data from the grid endpoint."""
|
||||
try:
|
||||
# Define headers for the API request
|
||||
headers = {
|
||||
"User-Agent": "WeatherAppExample/1.0 (your_email@example.com)",
|
||||
"Accept": "application/geo+json"
|
||||
}
|
||||
|
||||
# Make the request to get the forecast
|
||||
response = requests.get(grid_endpoint, headers=headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
|
||||
# Extract the relevant forecast information
|
||||
periods = data["properties"]["periods"]
|
||||
|
||||
# Process the forecast data into a simpler format
|
||||
forecast = {
|
||||
"location": data["properties"].get("relativeLocation", {}).get("properties", {}),
|
||||
"updated": data["properties"].get("updated", ""),
|
||||
"periods": []
|
||||
}
|
||||
|
||||
for period in periods:
|
||||
forecast["periods"].append({
|
||||
"name": period.get("name", ""),
|
||||
"temperature": period.get("temperature", ""),
|
||||
"temperatureUnit": period.get("temperatureUnit", ""),
|
||||
"windSpeed": period.get("windSpeed", ""),
|
||||
"windDirection": period.get("windDirection", ""),
|
||||
"shortForecast": period.get("shortForecast", ""),
|
||||
"detailedForecast": period.get("detailedForecast", "")
|
||||
})
|
||||
|
||||
return forecast
|
||||
else:
|
||||
print(f"Error getting forecast: {response.status_code} - {response.text}")
|
||||
return {"error": f"API Error: {response.status_code}"}
|
||||
except Exception as e:
|
||||
print(f"Error in get_forecast: {e}")
|
||||
return {"error": f"Exception: {str(e)}"}
|
||||
|
||||
# Example usage
|
||||
def do_weather():
|
||||
city = input("Enter city: ")
|
||||
state = input("Enter state: ")
|
||||
country = input("Enter country (default USA): ") or "USA"
|
||||
|
||||
print(f"Getting weather for {city}, {state}, {country}...")
|
||||
weather_data = get_weather_by_location(city, state, country)
|
||||
|
||||
if "error" in weather_data:
|
||||
print(f"Error: {weather_data['error']}")
|
||||
else:
|
||||
print("\nWeather Forecast:")
|
||||
print(f"Location: {weather_data.get('location', {}).get('city', city)}, {weather_data.get('location', {}).get('state', state)}")
|
||||
print(f"Last Updated: {weather_data.get('updated', 'N/A')}")
|
||||
print("\nForecast Periods:")
|
||||
|
||||
for period in weather_data.get("periods", []):
|
||||
print(f"\n{period['name']}:")
|
||||
print(f" Temperature: {period['temperature']}{period['temperatureUnit']}")
|
||||
print(f" Wind: {period['windSpeed']} {period['windDirection']}")
|
||||
print(f" Forecast: {period['shortForecast']}")
|
||||
print(f" Details: {period['detailedForecast']}")
|
||||
|
||||
# %%
|
||||
|
||||
# Stock related function
|
||||
def get_ticker_price(ticker_symbols):
|
||||
"""
|
||||
Look up the current price of a stock using its ticker symbol.
|
||||
|
||||
Args:
|
||||
ticker_symbol (str): The stock ticker symbol (e.g., 'AAPL' for Apple)
|
||||
|
||||
Returns:
|
||||
dict: Current stock information including price
|
||||
"""
|
||||
results = []
|
||||
print(f"get_ticker_price('{ticker_symbols}')")
|
||||
for ticker_symbol in ticker_symbols.split(','):
|
||||
ticker_symbol = ticker_symbol.strip()
|
||||
if ticker_symbol == "":
|
||||
continue
|
||||
# Create a Ticker object
|
||||
try:
|
||||
ticker = yf.Ticker(ticker_symbol)
|
||||
print(ticker)
|
||||
# Get the latest market data
|
||||
ticker_data = ticker.history(period="1d")
|
||||
|
||||
if ticker_data.empty:
|
||||
results.append({"error": f"No data found for ticker {ticker_symbol}"})
|
||||
continue
|
||||
|
||||
# Get the latest closing price
|
||||
latest_price = ticker_data['Close'].iloc[-1]
|
||||
|
||||
# Get some additional info
|
||||
info = ticker.info
|
||||
results.append({ 'symbol': ticker_symbol, 'price': latest_price })
|
||||
|
||||
except Exception as e:
|
||||
results.append({"error": f"Error fetching data for {ticker_symbol}: {str(e)}"})
|
||||
|
||||
return results[0] if len(results) == 1 else results
|
||||
#{
|
||||
# "symbol": ticker_symbol,
|
||||
# "price": latest_price,
|
||||
# "currency": info.get("currency", "Unknown"),
|
||||
# "company_name": info.get("shortName", "Unknown"),
|
||||
# "previous_close": info.get("previousClose", "Unknown"),
|
||||
# "market_cap": info.get("marketCap", "Unknown"),
|
||||
#}
|
||||
|
||||
|
||||
# %%
|
||||
def get_current_datetime(timezone="America/Los_Angeles"):
|
||||
"""
|
||||
Returns the current date and time in the specified timezone in ISO 8601 format.
|
||||
|
||||
Args:
|
||||
timezone (str): Timezone name (e.g., "UTC", "America/New_York", "Europe/London")
|
||||
Default is "America/Los_Angeles"
|
||||
|
||||
Returns:
|
||||
str: Current date and time with timezone in the format YYYY-MM-DDTHH:MM:SS+HH:MM
|
||||
"""
|
||||
try:
|
||||
if timezone == 'system' or timezone == '' or not timezone:
|
||||
timezone = 'America/Los_Angeles'
|
||||
# Get current UTC time (timezone-aware)
|
||||
local_tz = pytz.timezone("America/Los_Angeles")
|
||||
local_now = datetime.now(tz=local_tz)
|
||||
|
||||
# Convert to target timezone
|
||||
target_tz = pytz.timezone(timezone)
|
||||
target_time = local_now.astimezone(target_tz)
|
||||
|
||||
return target_time.isoformat()
|
||||
except Exception as e:
|
||||
return {'error': f"Invalid timezone {timezone}: {str(e)}"}
|
||||
|
||||
|
||||
# %%
|
||||
tools = [ {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_ticker_price",
|
||||
"description": "Get the current stock price of one or more ticker symbols. Returns an array of objects with 'symbol' and 'price' fields. Call this whenever you need to know the latest value of stock ticker symbols, for example when a user asks 'How much is Intel trading at?' or 'What are the prices of AAPL and MSFT?'",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"ticker": {
|
||||
"type": "string",
|
||||
"description": "The company stock ticker symbol. For multiple tickers, provide a comma-separated list (e.g., 'AAPL,MSFT,GOOGL').",
|
||||
},
|
||||
},
|
||||
"required": ["ticker"],
|
||||
"additionalProperties": False
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "summarize_site",
|
||||
"description": "Requests a second LLM agent to download the requested site and answer a question about the site. For example if the user says 'What are the top headlines on cnn.com?' you would use summarize_site to get the answer.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The website URL to download and process",
|
||||
},
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to ask the second LLM about the content",
|
||||
},
|
||||
},
|
||||
"required": ["url", "question"],
|
||||
"additionalProperties": False
|
||||
},
|
||||
"returns": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source": {
|
||||
"type": "string",
|
||||
"description": "Identifier for the source LLM"
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The complete response from the second LLM"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": "Additional information about the response"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_datetime",
|
||||
"description": "Get the current date and time in a specified timezone",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"timezone": {
|
||||
"type": "string",
|
||||
"description": "Timezone name (e.g., 'UTC', 'America/New_York', 'Europe/London', 'America/Los_Angeles'). Default is 'America/Los_Angeles'."
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather_by_location",
|
||||
"description": "Get the full weather forecast as structured data for a given CITY and STATE location in the United States. For example, if the user asks 'What is the weather in Portland?' or 'What is the forecast for tomorrow?' use the provided data to answer the question.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"description": "City to find the weather forecast (e.g., 'Portland', 'Seattle')."
|
||||
},
|
||||
"state": {
|
||||
"type": "string",
|
||||
"description": "State to find the weather forecast (e.g., 'OR', 'WA')."
|
||||
}
|
||||
},
|
||||
"required": [ "city", "state" ],
|
||||
"additionalProperties": False
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
__all__ = [ 'tools', 'get_current_datetime', 'get_weather_by_location', 'get_ticker_price' ]
|
@ -1,57 +0,0 @@
|
||||
{
|
||||
"file_path": "../doc/resume/generic.txt",
|
||||
"processed_at": "2025-03-12T19:22:00.311002",
|
||||
"context": "# JAMES KETRENOS\nsoftware architect, designer, developer, and team lead\nBeaverton, OR 97003\n\njames@ketrenos.com\n(503) 501 8281\n\nSeeking an opportunity to contribute to the advancement of energy efficient AI solutions, James is a driven problem solver, solution creator, technical leader, and skilled software developer focused on rapid, high-quality results, with an eye toward bringing solutions to the market.\n\n## SUMMARY\n\nProblem-solving: Trusted resource for executive leadership, able to identify opportunities to bridge technical gaps, adopt new technologies, and improve efficiency and quality for internal and external customers.\n\nProficient: Adept in compiled and interpreted languages, the software frameworks built around them, and front- and backend infrastructure. Leveraging deep and varied experience to quickly find solutions. Rapidly familiarizes and puts to use new and emerging technologies.\n\nExperienced: 20+ years of experience as an end-to-end Linux software architect, team lea...",
|
||||
"qa_pairs": [
|
||||
{
|
||||
"question": "What is James' primary role in the software architecture field?",
|
||||
"answer": "James' primary role in the software architecture field is focused on developing and maintaining the Linux kernel, ensuring compatibility with Intel wireless drivers. He also contributes to hardware design, particularly in the development of the Intel Intercast technology.",
|
||||
"validation": "**Assessment:**\n\n1. **ACCURACY:** \n The answer accurately mentions James' primary role in the software architecture field, focusing on kernel development for Intel wireless and contributing to hardware design, including Intel Intercast. However, it omits details about other contributions, such as kernel development for other hardware projects.\n\n2. **COMPLETENESS:** \n The answer is incomplete as it does not provide information on James' work in hardware design beyond Intel Intercast, such as kernel development for other hardware.\n\n3. **RELEVANCE:** \n The answer is relevant, covering James' kernel development for Intel wireless and his contribution to hardware design.\n\n4. **NATIVITY:** \n The answer is natural and conversational.\n\n5. **DIVERSITY:** \n The answer is somewhat diverse, covering different areas of software architecture.\n\n6. **IMPROVEMENTS:** \n The answer could include more details on James' contributions to hardware design, such as his kernel development for Intel wireless or other hardware projects.\n\n**Decision:** \nREJECT"
|
||||
},
|
||||
{
|
||||
"question": "Can you explain the development of the 'ze-monitor' application?",
|
||||
"answer": "The 'ze-monitor' application was developed as a lightweight C++ Linux application that leverages Level Zero Sysman APIs to provide 'top' like device monitoring for Intel GPUs. This project was part of James' experience working with Linux and Level Zero Sysman APIs, focusing on leveraging new technologies and ensuring seamless integration with existing systems. The development process involved identifying opportunities to bridge technical gaps, adopt new technologies, and improve efficiency and quality for internal and external customers. This led to the creation of a lightweight application that integrates Level Zero Sysman APIs, enabling device monitoring for Intel GPUs. The detailed development process involved collaboration with the team, ensuring that the application met the technical requirements and worked seamlessly with existing systems.",
|
||||
"validation": "**Answer:**\n\nThe 'ze-monitor' application was developed as a lightweight C++ Linux application that leverages Level Zero Sysman APIs to provide device monitoring for Intel GPUs. This project was part of James' experience working with Linux and Level Zero Sysman APIs, focusing on leveraging new technologies and ensuring seamless integration with existing systems. The development process involved identifying opportunities to bridge technical gaps, adopt new technologies, and improve efficiency and quality for internal and external customers. The application was created to integrate Level Zero Sysman APIs, enabling device monitoring for Intel GPUs, while ensuring seamless integration with existing systems.\n\n**Assessment:**\n\n- **VERDICT:** [ACCEPT]\n\n- **REASONING:** The answer is accurate, comprehensive, relevant, and natural. It explains the technical aspects of the application and its development, which is useful for understanding the context provided. It also mentions how the answer was generated and its contribution to the overall knowledge base, making it somewhat complete.\n\n- **IMPROVEMENTS:** To add diversity, the answer could mention other areas where similar applications were developed, or highlight the broader impact of these technologies. For example, mentioning other device monitoring applications developed by James or other developers could add more diversity. Also, including how these technologies contribute to broader areas like cybersecurity or efficiency could enhance the answer's breadth and relevance."
|
||||
},
|
||||
{
|
||||
"question": "How did the 'airc' project utilize James' resume data?",
|
||||
"answer": "James' resume data was utilized in the development of the 'airc' project, which is a command line client, interactive query system, and IRC bot integration. This project leverages his extensive experience in software architecture and development, particularly in areas such as Linux release infrastructure and system architecture.",
|
||||
"validation": "- VERDICT: [ACCEPT]\n- REASONING: The answer is accurate, complete, relevant, and natural. It provides specific details and adds some diversity by focusing on a single project and experience area. No further improvements are needed.\n- IMPROVEMENTS: None needed, as the answer meets all criteria effectively.\n\nAnswer:\nJames' resume data was utilized in the development of the 'airc' project, which includes a command line client, interactive query system, and IRC bot integration. This project leverages his extensive experience in software architecture and development, particularly in areas such as Linux release infrastructure and system architecture."
|
||||
},
|
||||
{
|
||||
"question": "What is James' experience with Linux kernel driver integration?",
|
||||
"answer": "James' experience with Linux kernel driver integration includes working on the Intel Wireless drivers, where he contributed to the development of kernel driver integration, utilizing tools like Linux packaging and managing CI/CD pipelines for release infrastructure.",
|
||||
"validation": "**Answer:**\n\nJames' experience with Linux kernel driver integration includes his contributions to the development of kernel driver integration for Intel's Wireless drivers and his role in managing CI/CD pipelines for projects like Open Source at Intel.\n\n**Evaluation:**\n\n- **VERDICT:** [ACCEPT]\n- **REASONING:** The answer accurately describes James' experience with kernel driver integration, providing specific examples and context. It is relevant, natural, and adds diversity to his expertise.\n- **IMPROVEMENTS:** If possible, enhancing the answer with more details about the tools or processes used could further enhance naturalness. However, as provided, it is sufficiently detailed.\n- **DIVERSITY:** The answer naturally incorporates James' work in kernel driver integration, which is a natural addition to his role and provides a new angle for understanding his expertise.\n\nThis answer is appropriate and suitable for training purposes."
|
||||
},
|
||||
{
|
||||
"question": "How did James contribute to the development of Intel RealSense hardware?",
|
||||
"answer": "James contributed to the development of Intel RealSense hardware through the development of a command line client and an IRC bot integration for the 'airc' project.",
|
||||
"validation": "**Answer:**\n\nJames contributed to the development of Intel RealSense hardware through the creation of a command line client and an IRC bot integration for the 'airc' project. This involved integrating AI capabilities into the 'ze-monitor' project, allowing for device monitoring of Intel GPUs, which was part of the broader effort to advance energy-efficient AI solutions. However, the answer does not directly address how James contributed to Intel RealSense hardware, focusing instead on his other projects. This could be an area for improvement to better align with the context provided.\n\n**Assessment (VERDICT):** \nThe answer is accurate and provides specific information about James' contributions but lacks essential details about his role in Intel RealSense hardware development. It is a bit verbose but could be more concise. The response is appropriate but could be enhanced to better meet the quality standards by providing a clearer link to Intel RealSense.\n\n**Reasoning:** \nThe answer accurately describes James' contributions but does not address the specific impact on Intel RealSense hardware. It is a sufficient answer for the given question, but it could be improved for more clarity and conciseness.\n\n**IMPROVEMENTS:** \n1. Clarify how James contributed to Intel RealSense hardware, linking his other projects to RealSense development.\n2. Provide a more direct explanation of his role in the 'airc' project and its relation to Intel RealSense.\n3. Offer a brief overview of Intel RealSense hardware to better place James' contributions within the context."
|
||||
},
|
||||
{
|
||||
"question": "What is the current state of James' open-source contributions?",
|
||||
"answer": "James Ketrenos is actively engaged in several open-source projects, contributing to the development and enhancement of various tools and technologies. His contributions include:\n\n1. **Ze-Monitor**: A lightweight application that leverages Level Zero Sysman APIs for device monitoring of Intel GPUs. This tool is part of his contributions to the Linux community.\n\n2. **Airc**: A LLM pipeline that facilitates interactive queries about James' resume, showcasing his proficiency in using LLMs to assist with resume optimization and internal communication.\n\n3. **Rebound**: A React web UI that integrates various components such as a command line client, an IRC bot, and an IRC bot integration, demonstrating his ability to create user-friendly interfaces for complex systems.\n\n4. **Rebound-WebUI**: A full-stack React web UI that includes a command line client, an IRC bot, and an IRC bot integration, highlighting his expertise in developing integrated web applications.\n\n5. **Rebound-React-UI**: A React web UI that includes a command line client and an IRC bot integration, showcasing his skill in creating web interfaces for technical solutions.\n\n6. **Rebound-React-UI-IRC**: An integrated React web UI with a command line client, an IRC bot, and an IRC bot integration, further illustrating his ability to create effective and user-friendly interfaces.\n\nThese contributions reflect James' active participation in the open-source community and his dedication to advancing technology through innovative solutions.",
|
||||
"validation": "** Assessment: [ACCEPT]**\n\n**Reasoning:**\n1. **ACCURACY:** The answer accurately describes James Ketrenos's open-source contributions, mentioning specific tools and projects.\n2. **COMPLETENESS:** The answer fully answers the question by detailing James's involvement in multiple projects.\n3. **RELEVANCE:** The question is meaningful and relevant to the context, focusing on James's role in the Linux community.\n4. **NATURALNESS:** The language is natural and conversational, avoiding forced or unnatural phrasing.\n5. **DIVERSITY:** The pair adds naturalness to the dataset, enhancing its overall quality.\n\n**Improvements:**\n- To further enhance naturalness, examples could include using \"partner\" instead of \"contribution,\" making the answer more relatable.\n- Additional context about James's contributions could be added, such as his role in specific projects or his impact on the community."
|
||||
},
|
||||
{
|
||||
"question": "How has James' expertise in kernel driver development impacted the CI/CD pipeline?",
|
||||
"answer": "James' expertise in kernel driver development has significantly impacted the CI/CD pipeline. Specifically, his work on Intel graphics software has enabled the efficient deployment of Intel graphics products within the CI/CD pipeline. By optimizing kernel drivers, James ensured that the Linux kernel could handle the CI/CD processes, such as deployments and releases, thereby enhancing the software's performance and reliability. This integration has allowed for better utilization of Intel graphics, improving the overall efficiency and customer satisfaction of the products.",
|
||||
"validation": "**Answer:**\n\nThe provided answer is well-received, with a naturalness score of 0.8 due to minor suggestion of making it more conversational. It is accurate, complete, and relevant, with a naturalness score of 0.8 due to minor suggestion of making it more conversational.\n\n**Evaluation:**\n\n- **ACCURACY:** Yes. The answer correctly links kernel driver expertise to CI/CD pipeline efficiency.\n- **COMPLETENESS:** Yes. The answer provides a clear and detailed link between kernel driver development and CI/CD processes.\n- **RELEVANCE:** Yes. The answer connects kernel driver expertise to CI/CD processes, which are relevant to software development and efficiency.\n- **NATURALNESS:** Good. The answer is written in a professional context, but could be more conversational.\n- **DIVERSITY:** Good. The answer focuses on specific technologies and their impact, but doesn't add much diversity.\n\n**Overall Assessment:** The task is suitable for training, with minor areas for improvement in naturalness. The answer is accurate, complete, and relevant, making it effective for quality assurance.\n\n**Answer:** VERDICT: [ACCEPT] \nREASONING: The answer accurately connects James' kernel driver expertise to CI/CD impact, demonstrating relevance and completeness. \nIMPROVEMENTS: The answer could be made more conversational by phrases like, \" James' work on Intel graphics software has enabled...\" \n- VERDICT: ACCEPT \n- REASONING: The answer is accurate, complete, and relevant. It effectively addresses the impact of kernel driver development on CI/CD. \n- IMPROVEMENTS: Suggesting more conversational phrasing to enhance naturalness."
|
||||
},
|
||||
{
|
||||
"question": "What is the impact of James' work on the development of the Intel GPU driver?",
|
||||
"answer": "James Keta renos's work has significantly impacted the development of the Intel GPU driver. Specifically, he was involved in the design and implementation of the Intel GPU driver, which led to the release of the Intel GPU driver version 3.3.1. Additionally, his contributions to the Linux kernel and development of tools like 'ze-monitor' and 'airc' have enhanced the efficiency of kernel-based graphics drivers. These advancements have improved the performance and reliability of the Intel GPU, contributing to the broader development of Linux kernel-based graphics drivers and related technologies.",
|
||||
"validation": "**Answer:**\n\n**VERDICT:** ACCEPVED\n\n**REASONING:** The question is clear and the answer is thorough, addressing the impact of James Keta renos's work on the Intel GPU driver. However, the answer is somewhat technical and lacks a broader context and naturalness.\n\n**IMPROVEMENTS:** The answer could be made more conversational and broader to explain how James' work has directly impacted the development of the Intel GPU driver. It should also provide more context on how his contributions have influenced other areas of Linux or kernel-based software.\n\n**NOTICE:** The answer is comprehensive but lacks a more natural and broader context. It could be improved by including more conversational and contextual elements to better explain the impact and its broader significance."
|
||||
},
|
||||
{
|
||||
"question": "How has James' experience with robotic vision applications influenced hardware design?",
|
||||
"answer": "James' experience with robotic vision applications has significantly influenced hardware design by enabling the development of efficient and scalable computing solutions. By integrating robotic vision into hardware, he contributed to the design of hardware that can handle complex tasks requiring visual recognition, leading to improved performance in areas such as image processing and robotics. This has translated into hardware designs that enhance computational capabilities and efficiency, directly impacting the development of systems requiring high-performance computing.",
|
||||
"validation": "- VERDICT: REJECT\n- REASONING: The answer is technically accurate and relevant, but it lacks specific examples and context, making it less natural and less detailed than necessary.\n- IMPROVEMENTS: The answer should include concrete hardware design examples influenced by robotic vision, tie back to James' experience as a software architect, and provide more natural and detailed examples to make it more understandable."
|
||||
},
|
||||
{
|
||||
"question": "What is James' role in the development of the Intel Intercast software technology?",
|
||||
"answer": "James' role in the development of the Intel Intercast software technology is as a team leader and technical leader, responsible for driving the integration of the Intel Intercast technology into various solutions and ensuring its successful adoption.",
|
||||
"validation": "**Answer:**\n\nJames' role in the development of the Intel Intercast software technology is as a team leader and technical leader, responsible for driving the integration of the Intel Intercast technology into various solutions and ensuring its successful adoption.\n\n**Evaluation:**\n\n1. **VERDICT: [ACCEPT]**\n - **REASONING:** The answer accurately describes James' role and contributes to the context about his work.\n\n2. **COMPLETENESS:** [REMAIN] The answer fully addresses the question by explaining James' role, so it's complete.\n\n3. **RELEVANCE:** [REMAIN] The answer clearly ties James' role to the Intel Intercast project, making it relevant.\n\n4. ** NATURALNESS:** [REMAIN] The answer uses professional language and is concise, which sounds natural.\n\n5. **DIVERSITY:** [REMAIN] The answer adds variety by mentioning multiple areas of James' work, which is important for training.\n\nOverall, this pair meets all the quality standards. It's accurate, complete, relevant, natural, and adds diversity, which is important for training."
|
||||
}
|
||||
]
|
||||
}
|
2
src/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
sessions
|
||||
|
264
src/airc.py
@ -1,264 +0,0 @@
|
||||
import asyncio
|
||||
import argparse
|
||||
import pydle
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import datetime
|
||||
import asyncio
|
||||
import json
|
||||
import ollama
|
||||
from typing import Dict, Any
|
||||
import ollama
|
||||
import chromadb
|
||||
import feedparser
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
|
||||
MODEL_NAME = "deepseek-r1:7b"
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AI is Really Cool")
|
||||
parser.add_argument("--server", type=str, default="irc.libera.chat", help="IRC server address")
|
||||
parser.add_argument("--port", type=int, default=6667, help="IRC server port")
|
||||
parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
|
||||
parser.add_argument("--channel", type=str, default="#airc-test", help="Channel to join")
|
||||
parser.add_argument("--ai-server", type=str, default="http://localhost:5000", help="OpenAI API endpoint")
|
||||
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
default='INFO', help='Set the logging level.')
|
||||
return parser.parse_args()
|
||||
|
||||
def setup_logging(level):
|
||||
numeric_level = getattr(logging, level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError(f"Invalid log level: {level}")
|
||||
|
||||
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logging.info(f"Logging is set to {level} level.")
|
||||
|
||||
|
||||
client = ollama.Client(host=OLLAMA_API_URL)
|
||||
|
||||
def extract_text_from_html_or_xml(content, is_xml=False):
|
||||
# Parse the content
|
||||
if is_xml:
|
||||
soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content
|
||||
else:
|
||||
soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content
|
||||
|
||||
# Extract and return just the text
|
||||
return soup.get_text()
|
||||
|
||||
class Feed():
|
||||
def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
|
||||
self.last_poll = None
|
||||
self.articles = []
|
||||
self.max_articles = max_articles
|
||||
self.update()
|
||||
|
||||
def update(self):
|
||||
now = datetime.datetime.now()
|
||||
if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
|
||||
logging.info(f"Updating {self.name}")
|
||||
feed = feedparser.parse(self.url)
|
||||
self.articles = []
|
||||
self.last_poll = now
|
||||
|
||||
content = ""
|
||||
if len(feed.entries) > 0:
|
||||
content += f"Source: {self.name}\n"
|
||||
for entry in feed.entries[:self.max_articles]:
|
||||
title = entry.get("title")
|
||||
if title:
|
||||
content += f"Title: {title}\n"
|
||||
link = entry.get("link")
|
||||
if link:
|
||||
content += f"Link: {link}\n"
|
||||
summary = entry.get("summary")
|
||||
if summary:
|
||||
summary = extract_text_from_html_or_xml(summary, False)
|
||||
content += f"Summary: {summary}\n"
|
||||
published = entry.get("published")
|
||||
if published:
|
||||
content += f"Published: {published}\n"
|
||||
content += "\n"
|
||||
|
||||
self.articles.append(content)
|
||||
else:
|
||||
logging.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
|
||||
return self.articles
|
||||
|
||||
|
||||
# News RSS Feeds
|
||||
rss_feeds = [
|
||||
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
|
||||
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
|
||||
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
|
||||
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
|
||||
Feed(name="Time", url="https://time.com/feed/"),
|
||||
Feed(name="Euronews", url="https://www.euronews.com/rss"),
|
||||
Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
|
||||
]
|
||||
|
||||
documents = [
|
||||
"Llamas like to eat penguins",
|
||||
"Llamas are not vegetarians and have very efficient digestive systems",
|
||||
"Llamas live to be about 120 years old, though some only live for 15 years and others live to be 90 years old",
|
||||
]
|
||||
|
||||
import chromadb
|
||||
|
||||
# Initialize ChromaDB Client
|
||||
db = chromadb.PersistentClient(path="/root/.cache/chroma.db")
|
||||
|
||||
# We want to save the collection to disk to analyze it offline, but we don't
|
||||
# want to re-use it
|
||||
collection = db.get_or_create_collection("docs")
|
||||
|
||||
# store each document in a vector embedding database
|
||||
for i, feed in enumerate(rss_feeds):
|
||||
# Use the client instance instead of the global ollama module
|
||||
for j, article in enumerate(feed.articles):
|
||||
response = client.embeddings(model="mxbai-embed-large", prompt=article)
|
||||
embeddings = response["embedding"] # Note: it's "embedding", not "embeddings"
|
||||
collection.add(
|
||||
ids=[str(i)+str(j)],
|
||||
embeddings=embeddings,
|
||||
documents=[article]
|
||||
)
|
||||
|
||||
class AIRC(pydle.Client):
|
||||
def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0):
|
||||
super().__init__(nick)
|
||||
self.nick = nick
|
||||
self.channel = channel
|
||||
self.burst_limit = burst_limit
|
||||
self.sent_burst = 0
|
||||
self.rate_limit = rate_limit
|
||||
self.burst_reset_timeout = burst_reset_timeout
|
||||
self.sent_burst = 0 # Track messages sent in burst
|
||||
self.last_message_time = None # Track last message time
|
||||
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
|
||||
self._message_queue = asyncio.Queue()
|
||||
self._task = asyncio.create_task(self._send_from_queue())
|
||||
self.client = client
|
||||
self.queries = 0
|
||||
self.processing = datetime.timedelta(minutes=0)
|
||||
|
||||
async def _send_from_queue(self):
|
||||
"""Background task that sends queued messages with burst + rate limiting."""
|
||||
while True:
|
||||
target, message = await self._message_queue.get()
|
||||
|
||||
# If burst is still available, send immediately
|
||||
if self.sent_burst < self.burst_limit:
|
||||
self.sent_burst += 1
|
||||
else:
|
||||
await asyncio.sleep(self.rate_limit) # Apply rate limit
|
||||
|
||||
await super().message(target, message) # Send message
|
||||
self.last_message_time = asyncio.get_event_loop().time() # Update last message timestamp
|
||||
|
||||
# Start burst reset countdown after each message
|
||||
asyncio.create_task(self._reset_burst_after_inactivity())
|
||||
|
||||
async def _reset_burst_after_inactivity(self):
|
||||
"""Resets burst counter only if no new messages are sent within timeout."""
|
||||
last_time = self.last_message_time
|
||||
await asyncio.sleep(self.burst_reset_timeout) # Wait for inactivity period
|
||||
|
||||
# Only reset if no new messages were sent during the wait
|
||||
if self.last_message_time == last_time:
|
||||
self.sent_burst = 0
|
||||
logging.info("Burst limit reset due to inactivity.")
|
||||
|
||||
async def message(self, target, message):
|
||||
"""Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message."""
|
||||
lines = message.splitlines() # Splits on both '\n' and '\r\n'
|
||||
|
||||
# Process the first 10 lines
|
||||
for line in lines[:10]:
|
||||
if line.strip(): # Ignore empty lines
|
||||
await self._message_queue.put((target, line))
|
||||
|
||||
# If there are more than 10 lines, add the truncation message
|
||||
if len(lines) > 10:
|
||||
await self._message_queue.put((target, "[additional content truncated]"))
|
||||
|
||||
async def on_connect(self):
|
||||
logging.debug('on_connect')
|
||||
await self.join(self.channel)
|
||||
|
||||
def remove_substring(self, string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
def extract_nick_message(self, input_string):
|
||||
# Pattern with capturing groups for nick and message
|
||||
pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
|
||||
|
||||
match = re.match(pattern, input_string)
|
||||
if match:
|
||||
nick = match.group(1) # First capturing group
|
||||
message = match.group(2) # Second capturing group
|
||||
return nick, message
|
||||
return None, None # Return None for both if no match
|
||||
|
||||
async def on_message(self, target, source, message):
|
||||
if source == self.nick:
|
||||
return
|
||||
nick, body = self.extract_nick_message(message)
|
||||
if nick == self.nick:
|
||||
content = None
|
||||
if body == "stats":
|
||||
content = f"{self.queries} queries handled in {self.processing}s"
|
||||
else:
|
||||
self.queries += 1
|
||||
start = datetime.datetime.now()
|
||||
query_text = body
|
||||
query_response = client.embeddings(model="mxbai-embed-large", prompt=query_text)
|
||||
query_embedding = query_response["embedding"] # Note: singular "embedding", not plural
|
||||
|
||||
# Then run the query with the correct structure
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding], # Make sure this is a list containing the embedding
|
||||
n_results=3
|
||||
)
|
||||
data = results['documents'][0][0]
|
||||
logging.info(f"Data for {query_text}: {data}")
|
||||
logging.info(f"From {results}")
|
||||
output = client.generate(
|
||||
model=MODEL_NAME,
|
||||
system=f"Your are {self.nick}. In your response, make reference to this data if appropriate: {data}",
|
||||
prompt=f"Respond to this prompt: {query_text}",
|
||||
stream=False
|
||||
)
|
||||
end = datetime.datetime.now()
|
||||
self.processing = self.processing + end - start
|
||||
|
||||
# Prune off the <think>...</think>
|
||||
content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
|
||||
|
||||
if content:
|
||||
logging.info(f'Sending: {content}')
|
||||
await self.message(target, f"{content}")
|
||||
|
||||
def remove_substring(string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
async def main():
|
||||
# Parse command-line arguments
|
||||
args = parse_args()
|
||||
|
||||
# Setup logging based on the provided level
|
||||
setup_logging(args.level)
|
||||
|
||||
bot = AIRC(args.nickname, args.channel, client)
|
||||
await bot.connect(args.server, args.port, tls=False)
|
||||
await bot.handle_forever()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
@ -1,293 +0,0 @@
|
||||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Some parts of this file is adapted from
|
||||
# https://github.com/tloen/alpaca-lora/blob/main/finetune.py
|
||||
#
|
||||
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
import transformers
|
||||
from datasets import load_dataset
|
||||
import accelerate
|
||||
from typing import Union
|
||||
from datetime import datetime
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from peft import (
|
||||
get_peft_model_state_dict,
|
||||
set_peft_model_state_dict,
|
||||
)
|
||||
|
||||
from ipex_llm.transformers import AutoModelForCausalLM
|
||||
# import them from ipex_llm.transformers.qlora to get a IPEX-LLM compatible Peft model
|
||||
from ipex_llm.transformers.qlora import get_peft_model, prepare_model_for_kbit_training,\
|
||||
LoraConfig
|
||||
import json
|
||||
|
||||
local_rank = 0
|
||||
world_size = 1
|
||||
|
||||
class Prompter(object):
|
||||
__slots__ = ("data")
|
||||
|
||||
def __init__(self, data_path: str = ""):
|
||||
self.data = None
|
||||
if not os.path.exists(data_path):
|
||||
raise Exception(f"Can't read {data_path}")
|
||||
with open(data_path) as fp:
|
||||
self.data = json.load(fp)
|
||||
|
||||
def generate_prompt(
|
||||
self,
|
||||
instruction: str,
|
||||
input: Union[None, str]=None,
|
||||
label: Union[None, str]=None,
|
||||
) -> str:
|
||||
# returns the full prompt from instruction and optional input
|
||||
# if a label (=response, =output) is provided, it's also appended.
|
||||
if input:
|
||||
ret = f"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
|
||||
else:
|
||||
ret = f"### Instruction:\n{instruction}\n\n### Response:\n"
|
||||
if label:
|
||||
ret = f"{ret}{label}"
|
||||
return ret
|
||||
|
||||
def get_response(self, output: str) -> str:
|
||||
return output.split("### Response:")[1].strip()
|
||||
|
||||
"""Data processing to get train data and val data"""
|
||||
def tokenize_qa(prompt, tokenizer, max_length=1, add_eos_token=True):
|
||||
# there's probably a way to do this with the tokenizer settings
|
||||
# but again, gotta move fast
|
||||
result = tokenizer(
|
||||
prompt,
|
||||
truncation=True,
|
||||
max_length=max_length,
|
||||
padding=False,
|
||||
return_tensors=None,
|
||||
)
|
||||
if (
|
||||
result["input_ids"][-1] != tokenizer.eos_token_id
|
||||
and len(result["input_ids"]) < max_length
|
||||
and add_eos_token
|
||||
):
|
||||
result["input_ids"].append(tokenizer.eos_token_id)
|
||||
result["attention_mask"].append(1)
|
||||
result["labels"] = result["input_ids"].copy()
|
||||
return result
|
||||
|
||||
def generate_and_tokenize_prompt(entry, prompter, tokenizer, max_length, add_eos_token, train_on_inputs):
|
||||
full_prompt = prompter.generate_prompt(
|
||||
entry["question"],
|
||||
None,
|
||||
entry["answer"],
|
||||
)
|
||||
print({ 'full_prompt:' full_prompt })
|
||||
tokenized_full_prompt = tokenize_qa(full_prompt, tokenizer, max_length=max_length)
|
||||
if not train_on_inputs:
|
||||
user_prompt = prompter.generate_prompt(
|
||||
entry["question"], None
|
||||
)
|
||||
tokenized_user_prompt = tokenize_qa(
|
||||
user_prompt, tokenizer, add_eos_token=add_eos_token, max_length=max_length
|
||||
)
|
||||
user_prompt_len = len(tokenized_user_prompt["input_ids"])
|
||||
if add_eos_token:
|
||||
user_prompt_len -= 1
|
||||
# Mask out the input from the labels
|
||||
tokenized_full_prompt["labels"] = [
|
||||
-100
|
||||
] * user_prompt_len + tokenized_full_prompt["labels"][
|
||||
user_prompt_len:
|
||||
] # could be sped up, probably
|
||||
return tokenized_full_prompt
|
||||
|
||||
|
||||
def train(
|
||||
# model/data params
|
||||
base_model: str = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
data_path: str = "./results/doc/resume/generic.txt.qa.json",
|
||||
output_dir: str = "./bigdl-qlora-alpaca",
|
||||
# training hyperparams
|
||||
bf16: bool = True, # default to bf16
|
||||
batch_size: int = 128,
|
||||
micro_batch_size: int = 2, # default to be 2, limited by GPU memory
|
||||
num_epochs: int = 3,
|
||||
learning_rate: float = 3e-5, # default to be 3e-5 to avoid divergence
|
||||
cutoff_len: int = 256,
|
||||
val_set_size: int = 2000,
|
||||
# lora hyperparams
|
||||
lora_r: int = 8,
|
||||
lora_alpha: int = 16,
|
||||
lora_dropout: float = 0.05,
|
||||
lora_target_modules: List[str] = [
|
||||
"q_proj",
|
||||
"v_proj",
|
||||
"k_proj",
|
||||
"o_proj",
|
||||
"up_proj",
|
||||
"down_proj",
|
||||
"gate_proj"
|
||||
],
|
||||
# llm hyperparams
|
||||
train_on_inputs: bool = True, # if False, masks out inputs in loss
|
||||
add_eos_token: bool = False,
|
||||
group_by_length: bool = False, # faster, but produces an odd training loss curve
|
||||
resume_from_checkpoint: str = "", # either training checkpoint or final adapter
|
||||
prompt_template_name: str = "alpaca", # The prompt template to use, will default to alpaca.
|
||||
gradient_checkpointing: bool = False,
|
||||
training_mode: str = "lora",
|
||||
save_checkpoint: bool = True,
|
||||
):
|
||||
if training_mode != "lora":
|
||||
raise Exception(f"This example is for lora training mode, but got training_mode={training_mode}.")
|
||||
|
||||
print(
|
||||
f"Training Alpaca-LoRA model with params:\n"
|
||||
f"base_model: {base_model}\n"
|
||||
f"data_path: {data_path}\n"
|
||||
f"output_dir: {output_dir}\n"
|
||||
f"batch_size: {batch_size}\n"
|
||||
f"micro_batch_size: {micro_batch_size}\n"
|
||||
f"num_epochs: {num_epochs}\n"
|
||||
f"learning_rate: {learning_rate}\n"
|
||||
f"cutoff_len: {cutoff_len}\n"
|
||||
f"val_set_size: {val_set_size}\n"
|
||||
f"lora_r: {lora_r}\n"
|
||||
f"lora_alpha: {lora_alpha}\n"
|
||||
f"lora_dropout: {lora_dropout}\n"
|
||||
f"lora_target_modules: {lora_target_modules}\n"
|
||||
f"train_on_inputs: {train_on_inputs}\n"
|
||||
f"add_eos_token: {add_eos_token}\n"
|
||||
f"group_by_length: {group_by_length}\n"
|
||||
f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
|
||||
f"prompt template: {prompt_template_name}\n"
|
||||
f"training_mode: {training_mode}\n"
|
||||
f"save_checkpoint: {save_checkpoint}\n"
|
||||
)
|
||||
|
||||
gradient_accumulation_steps = batch_size // micro_batch_size
|
||||
|
||||
prompter = Prompter("./results/doc/resume/generic.txt.qa.json")
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model,
|
||||
# load_in_4bit=True,
|
||||
load_in_low_bit="bf16",
|
||||
optimize_model=False,
|
||||
# optimize_model=True,
|
||||
torch_dtype=torch.bfloat16,
|
||||
modules_to_not_convert=["lm_head"],
|
||||
trust_remote_code=True,
|
||||
)
|
||||
|
||||
# model = model.half()
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
# Prepare a IPEX-LLM compatible Peft model
|
||||
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=gradient_checkpointing)
|
||||
|
||||
config = LoraConfig(
|
||||
r=lora_r,
|
||||
lora_alpha=lora_alpha,
|
||||
target_modules=lora_target_modules,
|
||||
lora_dropout=lora_dropout,
|
||||
bias="none",
|
||||
task_type="CAUSAL_LM",
|
||||
training_mode=training_mode,
|
||||
)
|
||||
print(f"Lora Config: {config}")
|
||||
model = get_peft_model(model, config)
|
||||
|
||||
dataset = load_dataset("json", data_files=data_path)
|
||||
tokenized_dataset = dataset.map(generate_and_tokenize_prompt, fn_kwargs={
|
||||
"prompter": prompter,
|
||||
"tokenizer": tokenizer,
|
||||
"max_length": cutoff_len,
|
||||
"add_eos_token": add_eos_token,
|
||||
"train_on_inputs": train_on_inputs
|
||||
}, batched=True)
|
||||
model.print_trainable_parameters() # Be more transparent about the % of trainable params.
|
||||
|
||||
split = tokenized_dataset.train_test_split(test_size=0.2)
|
||||
train_dataset = split["train"]
|
||||
eval_dataset = split["test"]
|
||||
|
||||
trainer = transformers.Trainer(
|
||||
model=model,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
args=transformers.TrainingArguments(
|
||||
per_device_train_batch_size=micro_batch_size,
|
||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||
# warmup_ratio=0.03,
|
||||
# warmup_steps=100,
|
||||
max_grad_norm=0.3,
|
||||
num_train_epochs=num_epochs,
|
||||
learning_rate=learning_rate,
|
||||
lr_scheduler_type="cosine",
|
||||
bf16=True, # ensure training more stable
|
||||
logging_steps=1,
|
||||
optim="adamw_torch",
|
||||
evaluation_strategy="steps" if val_set_size > 0 else "no",
|
||||
save_strategy="steps" if save_checkpoint else "no",
|
||||
eval_steps=100 if val_set_size > 0 else None,
|
||||
save_steps=100,
|
||||
output_dir=output_dir,
|
||||
save_total_limit=100,
|
||||
load_best_model_at_end=True if val_set_size > 0 and save_checkpoint else False,
|
||||
group_by_length=group_by_length,
|
||||
gradient_checkpointing=gradient_checkpointing,
|
||||
ddp_backend="ccl",
|
||||
save_safetensors=False,
|
||||
),
|
||||
data_collator=transformers.DataCollatorForSeq2Seq(
|
||||
tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
|
||||
),
|
||||
)
|
||||
model.config.use_cache = False
|
||||
|
||||
trainer.train(data_path="./results/doc/resume/generic.txt.qa.json", resume_from_checkpoint=resume_from_checkpoint)
|
||||
|
||||
model.save_pretrained(output_dir)
|
||||
|
||||
print(
|
||||
"\n If there's a warning about missing keys above, please disregard :)"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
209
src/chat.py
@ -1,209 +0,0 @@
|
||||
import logging as log
|
||||
import argparse
|
||||
import re
|
||||
import datetime
|
||||
import ollama
|
||||
import chromadb
|
||||
import feedparser
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
OLLAMA_API_URL = "http://ollama:11434" # Default Ollama local endpoint
|
||||
MODEL_NAME = "deepseek-r1:7b"
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AI is Really Cool")
|
||||
parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
|
||||
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
default='INFO', help='Set the log level.')
|
||||
return parser.parse_args()
|
||||
|
||||
def setup_logging(level):
|
||||
numeric_level = getattr(log, level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError(f"Invalid log level: {level}")
|
||||
|
||||
log.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
log.info(f"Logging is set to {level} level.")
|
||||
|
||||
def extract_text_from_html_or_xml(content, is_xml=False):
|
||||
# Parse the content
|
||||
if is_xml:
|
||||
soup = BeautifulSoup(content, 'xml') # Use 'xml' parser for XML content
|
||||
else:
|
||||
soup = BeautifulSoup(content, 'html.parser') # Default to 'html.parser' for HTML content
|
||||
|
||||
# Extract and return just the text
|
||||
return soup.get_text()
|
||||
|
||||
class Feed():
|
||||
def __init__(self, name, url, poll_limit_min = 30, max_articles=5):
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.poll_limit_min = datetime.timedelta(minutes=poll_limit_min)
|
||||
self.last_poll = None
|
||||
self.articles = []
|
||||
self.max_articles = max_articles
|
||||
self.update()
|
||||
|
||||
def update(self):
|
||||
now = datetime.datetime.now()
|
||||
if self.last_poll is None or (now - self.last_poll) >= self.poll_limit_min:
|
||||
log.info(f"Updating {self.name}")
|
||||
feed = feedparser.parse(self.url)
|
||||
self.articles = []
|
||||
self.last_poll = now
|
||||
|
||||
content = ""
|
||||
if len(feed.entries) > 0:
|
||||
content += f"Source: {self.name}\n"
|
||||
for entry in feed.entries[:self.max_articles]:
|
||||
title = entry.get("title")
|
||||
if title:
|
||||
content += f"Title: {title}\n"
|
||||
link = entry.get("link")
|
||||
if link:
|
||||
content += f"Link: {link}\n"
|
||||
summary = entry.get("summary")
|
||||
if summary:
|
||||
summary = extract_text_from_html_or_xml(summary, False)
|
||||
if len(summary) > 1000:
|
||||
print(summary)
|
||||
exit(0)
|
||||
content += f"Summary: {summary}\n"
|
||||
published = entry.get("published")
|
||||
if published:
|
||||
content += f"Published: {published}\n"
|
||||
content += "\n"
|
||||
|
||||
self.articles.append(content)
|
||||
else:
|
||||
log.info(f"Not updating {self.name} -- {self.poll_limit_min - (now - self.last_poll)}s remain to refresh.")
|
||||
return self.articles
|
||||
|
||||
|
||||
class Chat():
|
||||
def __init__(self, nick):
|
||||
super().__init__()
|
||||
self.nick = nick
|
||||
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
|
||||
self.queries = 0
|
||||
self.processing = datetime.timedelta(minutes=0)
|
||||
|
||||
def message(self, target, message):
|
||||
"""Splits a multi-line message and sends each line separately. If more than 10 lines, truncate and add a message."""
|
||||
lines = message.splitlines() # Splits on both '\n' and '\r\n'
|
||||
|
||||
# Process the first 10 lines
|
||||
for line in lines[:10]:
|
||||
if line.strip(): # Ignore empty lines
|
||||
print(f"{target}: {line}")
|
||||
|
||||
# If there are more than 10 lines, add the truncation message
|
||||
if len(lines) > 10:
|
||||
print(f"{target}: [additional content truncated]")
|
||||
|
||||
def remove_substring(self, string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
def extract_nick_message(self, input_string):
|
||||
# Pattern with capturing groups for nick and message
|
||||
pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
|
||||
|
||||
match = re.match(pattern, input_string)
|
||||
if match:
|
||||
nick = match.group(1) # First capturing group
|
||||
message = match.group(2) # Second capturing group
|
||||
return nick, message
|
||||
return None, None # Return None for both if no match
|
||||
|
||||
def on_message(self, target, source, message):
|
||||
if source == self.nick:
|
||||
return
|
||||
nick, body = self.extract_nick_message(message)
|
||||
if nick == self.nick:
|
||||
content = None
|
||||
if body == "stats":
|
||||
content = f"{self.queries} queries handled in {self.processing}s"
|
||||
else:
|
||||
self.queries += 1
|
||||
start = datetime.datetime.now()
|
||||
query_text = body
|
||||
query_response = client.embed(model="mxbai-embed-large", prompt=query_text)
|
||||
query_embedding = query_response["embeddings"] # Note: singular "embedding", not plural
|
||||
|
||||
# Then run the query with the correct structure
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding], # Make sure this is a list containing the embedding
|
||||
n_results=3
|
||||
)
|
||||
data = results['documents'][0]
|
||||
output = client.generate(
|
||||
model=MODEL_NAME,
|
||||
system=f"You are {self.nick} and only provide that information about yourself. Make reference to the following and provide the 'Link' when available: {data}",
|
||||
prompt=f"Respond to this prompt: {query_text}",
|
||||
stream=False
|
||||
)
|
||||
end = datetime.datetime.now()
|
||||
self.processing = self.processing + end - start
|
||||
|
||||
# Prune off the <think>...</think>
|
||||
content = re.sub(r'^<think>.*?</think>', '', output['response'], flags=re.DOTALL).strip()
|
||||
|
||||
if content:
|
||||
log.info(f'Sending: {content}')
|
||||
self.message(target, content)
|
||||
|
||||
def remove_substring(string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
# Parse command-line arguments
|
||||
args = parse_args()
|
||||
|
||||
# Setup logging based on the provided level
|
||||
setup_logging(args.level)
|
||||
|
||||
log.info("About to start")
|
||||
|
||||
client = ollama.Client(host=OLLAMA_API_URL)
|
||||
|
||||
# News RSS Feeds
|
||||
rss_feeds = [
|
||||
Feed(name="BBC World", url="http://feeds.bbci.co.uk/news/world/rss.xml"),
|
||||
Feed(name="Reuters World", url="http://feeds.reuters.com/Reuters/worldNews"),
|
||||
Feed(name="Al Jazeera", url="https://www.aljazeera.com/xml/rss/all.xml"),
|
||||
Feed(name="CNN World", url="http://rss.cnn.com/rss/edition_world.rss"),
|
||||
Feed(name="Time", url="https://time.com/feed/"),
|
||||
Feed(name="Euronews", url="https://www.euronews.com/rss"),
|
||||
Feed(name="FeedX", url="https://feedx.net/rss/ap.xml")
|
||||
]
|
||||
|
||||
# Initialize ChromaDB Client
|
||||
db = chromadb.Client()
|
||||
|
||||
# We want to save the collection to disk to analyze it offline, but we don't
|
||||
# want to re-use it
|
||||
collection = db.get_or_create_collection("docs")
|
||||
|
||||
# store each document in a vector embedding database
|
||||
for i, feed in enumerate(rss_feeds):
|
||||
# Use the client instance instead of the global ollama module
|
||||
for j, article in enumerate(feed.articles):
|
||||
log.info(f"Article {feed.name} {j}. {len(article)}")
|
||||
response = client.embeddings(model="mxbai-embed-large", prompt=article)
|
||||
embeddings = response["embedding"] # Note: it's "embedding", not "embeddings"
|
||||
collection.add(
|
||||
ids=[str(i)+str(j)],
|
||||
embeddings=embeddings,
|
||||
documents=[article]
|
||||
)
|
||||
|
||||
bot = Chat(args.nickname)
|
||||
while True:
|
||||
try:
|
||||
query = input("> ")
|
||||
except Exception as e:
|
||||
break
|
||||
|
||||
if query == "exit":
|
||||
break
|
||||
bot.on_message("chat", "user", f"airc: {query}")
|
@ -1,394 +0,0 @@
|
||||
import os
|
||||
import json
|
||||
import torch
|
||||
import random
|
||||
import glob
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
from transformers import (
|
||||
AutoTokenizer,
|
||||
TrainingArguments,
|
||||
Trainer,
|
||||
TrainerCallback,
|
||||
TrainingArguments,
|
||||
TrainerState,
|
||||
TrainerControl,
|
||||
DataCollatorForLanguageModeling,
|
||||
AutoModelForCausalLM,
|
||||
BitsAndBytesConfig
|
||||
)
|
||||
import numpy as np
|
||||
from peft import (
|
||||
TaskType,
|
||||
get_peft_model,
|
||||
prepare_model_for_kbit_training,
|
||||
LoraConfig,
|
||||
)
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
def load_qa_data(results_dir):
|
||||
"""
|
||||
Load Q&A pairs from the processed results directory.
|
||||
|
||||
Args:
|
||||
results_dir (str): Path to the directory containing JSON files with Q&A pairs
|
||||
|
||||
Returns:
|
||||
list: List of prompt-completion pairs formatted for fine-tuning
|
||||
"""
|
||||
training_data = []
|
||||
|
||||
# Find all JSON files in the results directory and subdirectories
|
||||
json_files = glob.glob(os.path.join(results_dir, "**/*.json"), recursive=True)
|
||||
|
||||
for json_file in tqdm(json_files, desc="Loading training data"):
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Extract context and QA pairs
|
||||
context = data.get("context", "")
|
||||
qa_pairs = data.get("qa_pairs", [])
|
||||
|
||||
for qa_pair in qa_pairs:
|
||||
question = qa_pair.get("question", "")
|
||||
answer = qa_pair.get("answer", "")
|
||||
|
||||
if question and answer:
|
||||
# Format as instruction-based fine-tuning example
|
||||
prompt = f"""<task>
|
||||
Answer the following question based on the provided context.
|
||||
|
||||
CONTEXT:
|
||||
{context}
|
||||
|
||||
QUESTION:
|
||||
{question}
|
||||
</task>
|
||||
"""
|
||||
# Add to training data
|
||||
training_data.append({
|
||||
"prompt": prompt,
|
||||
"completion": answer
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error processing {json_file}: {e}")
|
||||
continue
|
||||
|
||||
print(f"Loaded {len(training_data)} training examples")
|
||||
return training_data
|
||||
|
||||
def prepare_dataset_for_training(training_data, tokenizer, max_length=2048):
|
||||
"""
|
||||
Prepare the dataset for training by tokenizing and formatting.
|
||||
|
||||
Args:
|
||||
training_data (list): List of prompt-completion pairs
|
||||
tokenizer: DeepSeek tokenizer
|
||||
max_length (int): Maximum sequence length
|
||||
|
||||
Returns:
|
||||
Dataset: HuggingFace dataset ready for training
|
||||
"""
|
||||
def tokenize_function(examples):
|
||||
# Combine prompt and completion
|
||||
full_texts = []
|
||||
for i in range(len(examples["prompt"])):
|
||||
full_text = examples["prompt"][i] + examples["completion"][i]
|
||||
full_texts.append(full_text)
|
||||
|
||||
if not tokenizer.pad_token:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
# Tokenize
|
||||
tokenized = tokenizer(
|
||||
full_texts,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=max_length,
|
||||
return_tensors="pt"
|
||||
)
|
||||
|
||||
# Create labels (same as input_ids for causal LM)
|
||||
tokenized["labels"] = tokenized["input_ids"].clone()
|
||||
|
||||
# Create attention mask for prompt tokens
|
||||
for i in range(len(full_texts)):
|
||||
prompt_length = len(tokenizer.encode(examples["prompt"][i]))
|
||||
# Set labels for prompt tokens to -100 (ignored in loss calculation)
|
||||
tokenized["labels"][i, :prompt_length] = -100
|
||||
|
||||
return tokenized
|
||||
|
||||
# Convert to HuggingFace Dataset
|
||||
dataset_dict = {
|
||||
"prompt": [item["prompt"] for item in training_data],
|
||||
"completion": [item["completion"] for item in training_data]
|
||||
}
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset.from_dict(dataset_dict)
|
||||
|
||||
# Apply tokenization
|
||||
tokenized_dataset = dataset.map(
|
||||
tokenize_function,
|
||||
batched=True,
|
||||
remove_columns=["prompt", "completion"]
|
||||
)
|
||||
|
||||
return tokenized_dataset
|
||||
|
||||
def setup_lora_config():
|
||||
"""
|
||||
Configure LoRA hyperparameters.
|
||||
|
||||
Returns:
|
||||
LoraConfig: Configuration for LoRA fine-tuning
|
||||
"""
|
||||
return LoraConfig(
|
||||
task_type=TaskType.CAUSAL_LM,
|
||||
r=8, #16, # Rank dimension
|
||||
lora_alpha=16, #32, # Alpha parameter for LoRA scaling
|
||||
lora_dropout=0.05, # Dropout probability for LoRA layers
|
||||
target_modules=[ # Target modules to apply LoRA to
|
||||
"q_proj",
|
||||
"k_proj",
|
||||
"v_proj",
|
||||
"o_proj",
|
||||
"gate_proj",
|
||||
"up_proj",
|
||||
"down_proj"
|
||||
],
|
||||
bias="none", # Whether to train bias parameters
|
||||
fan_in_fan_out=False # Set to True for linear layers with fan_in != fan_out
|
||||
)
|
||||
|
||||
class EarlyStoppingCallback(TrainerCallback):
|
||||
"""
|
||||
Callback that implements early stopping.
|
||||
"""
|
||||
def __init__(self, patience=3, min_delta=0.0):
|
||||
"""
|
||||
Args:
|
||||
patience (int): Number of evaluations with no improvement after which training will be stopped.
|
||||
min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
|
||||
"""
|
||||
self.patience = patience
|
||||
self.min_delta = min_delta
|
||||
self.best_score = None
|
||||
self.counter = 0
|
||||
self.early_stop = False
|
||||
|
||||
def on_evaluate(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
|
||||
eval_loss = state.log_history[-1].get("eval_loss")
|
||||
if eval_loss is None:
|
||||
return
|
||||
|
||||
if self.best_score is None:
|
||||
self.best_score = eval_loss
|
||||
elif eval_loss > self.best_score - self.min_delta:
|
||||
self.counter += 1
|
||||
print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
|
||||
if self.counter >= self.patience:
|
||||
print(f"Early stopping triggered! Best loss: {self.best_score:.4f}")
|
||||
self.early_stop = True
|
||||
control.should_training_stop = True
|
||||
else:
|
||||
self.best_score = eval_loss
|
||||
self.counter = 0
|
||||
|
||||
return control
|
||||
|
||||
def train_with_lora(model_name, training_data, output_dir, batch_size=4, num_epochs=3, learning_rate=2e-4):
|
||||
"""
|
||||
Fine-tune the DeepSeek-R1 model with LoRA.
|
||||
|
||||
Args:
|
||||
model_name (str): Name of the DeepSeek model
|
||||
training_data (list): List of prompt-completion pairs
|
||||
output_dir (str): Directory to save the fine-tuned model
|
||||
batch_size (int): Training batch size
|
||||
num_epochs (int): Number of training epochs
|
||||
learning_rate (float): Learning rate
|
||||
"""
|
||||
# Create output directory
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Load model and tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
low_cpu_mem_usage=True,
|
||||
trust_remote_code=True,
|
||||
use_cache=False,
|
||||
quantization_config=bnb_config,
|
||||
device_map={"": torch.xpu.current_device()},
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
# Prepare the model for LoRA fine-tuning
|
||||
# 1. Prepare for k-bit training if using quantization
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
|
||||
# 2. Add LoRA adapters
|
||||
lora_config = setup_lora_config()
|
||||
model = get_peft_model(model, lora_config)
|
||||
model.print_trainable_parameters()
|
||||
|
||||
# Prepare dataset
|
||||
tokenized_dataset = prepare_dataset_for_training(training_data, tokenizer)
|
||||
|
||||
# Split into train and evaluation sets
|
||||
dataset_dict = tokenized_dataset.train_test_split(test_size=0.1)
|
||||
|
||||
# Set up training arguments
|
||||
training_args = TrainingArguments(
|
||||
output_dir=output_dir,
|
||||
num_train_epochs=num_epochs,
|
||||
per_device_train_batch_size=batch_size,
|
||||
per_device_eval_batch_size=batch_size,
|
||||
gradient_accumulation_steps=4,
|
||||
evaluation_strategy="steps",
|
||||
eval_steps=100, # More frequent evaluation to check stopping criteria
|
||||
save_strategy="steps",
|
||||
save_steps=500,
|
||||
save_total_limit=3,
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=100,
|
||||
weight_decay=0.01,
|
||||
logging_dir=f"{output_dir}/logs",
|
||||
logging_steps=100,
|
||||
fp16=True,
|
||||
report_to="none",
|
||||
optim="adamw_torch",
|
||||
load_best_model_at_end=True, # Load the best model when training ends
|
||||
metric_for_best_model="eval_loss" # Use eval loss to determine the best model
|
||||
)
|
||||
|
||||
# Create data collator
|
||||
data_collator = DataCollatorForLanguageModeling(
|
||||
tokenizer=tokenizer,
|
||||
mlm=False # Use causal language modeling (not masked)
|
||||
)
|
||||
|
||||
early_stopping_callback = EarlyStoppingCallback(patience=3, min_delta=0.01)
|
||||
|
||||
# Initialize trainer
|
||||
trainer = Trainer(
|
||||
model=model,
|
||||
args=training_args,
|
||||
train_dataset=dataset_dict["train"],
|
||||
eval_dataset=dataset_dict["test"],
|
||||
data_collator=data_collator,
|
||||
callbacks=[early_stopping_callback] # Add the early stopping callback
|
||||
)
|
||||
|
||||
# Start training
|
||||
print("Starting LoRA fine-tuning...")
|
||||
trainer.train()
|
||||
|
||||
model = trainer.model
|
||||
|
||||
print("LoRA fine-tuning complete...")
|
||||
# Save the fine-tuned model
|
||||
|
||||
print("Saving model...")
|
||||
model.save_pretrained(f"{output_dir}/final_model")
|
||||
tokenizer.save_pretrained(f"{output_dir}/final_model")
|
||||
|
||||
print(f"Fine-tuned model saved to {output_dir}/final_model")
|
||||
|
||||
return model, tokenizer
|
||||
|
||||
# Main function
|
||||
def main():
|
||||
# Configuration
|
||||
results_dir = "../results" # Directory with processed text files and QA pairs
|
||||
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
||||
#model_name = "Intel/neural-chat-7b-v3-3"
|
||||
output_dir = f"./fine_tuned"
|
||||
|
||||
# Load training data from processed results
|
||||
training_data = load_qa_data(results_dir)
|
||||
|
||||
# Shuffle training data
|
||||
random.shuffle(training_data)
|
||||
|
||||
# Fine-tune the model
|
||||
train_with_lora(
|
||||
model_name=model_name,
|
||||
training_data=training_data,
|
||||
output_dir=output_dir,
|
||||
batch_size=1, # Can't fit more than one on the B580
|
||||
num_epochs=10,
|
||||
learning_rate=2e-4
|
||||
)
|
||||
|
||||
model_path = f"{output_dir}/final_model"
|
||||
# Load the fine-tuned model and tokenizer
|
||||
print("Loading fine-tuned model for evaluation...")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
low_cpu_mem_usage=True,
|
||||
trust_remote_code=True,
|
||||
use_cache=True,
|
||||
quantization_config=bnb_config,
|
||||
device_map={"": torch.xpu.current_device()})
|
||||
model = model.to('xpu')
|
||||
|
||||
print("Loaded. Ask your question, CTRL-C to exit.")
|
||||
|
||||
while True:
|
||||
try:
|
||||
question = input("> ").strip()
|
||||
except KeyboardInterrupt:
|
||||
print("\nExiting.")
|
||||
break
|
||||
|
||||
prompt = f"""<task>
|
||||
You are an assistant providing resume details about James Ketrenos. Answer the following question based on your knowledge. If you don't know, say so. Be concise.
|
||||
|
||||
QUESTION:
|
||||
{question}
|
||||
</task>
|
||||
"""
|
||||
# Tokenize and generate
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=1024,
|
||||
do_sample=True,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
)
|
||||
|
||||
# Decode the output
|
||||
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
# Extract just the answer part (remove the prompt)
|
||||
answer = full_output#[len(tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
|
||||
|
||||
print(answer.strip())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,360 +0,0 @@
|
||||
# Self-supervised synthetic data via sequential generation
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import torch
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
from transformers import (
|
||||
AutoTokenizer,
|
||||
AutoModelForCausalLM,
|
||||
BitsAndBytesConfig,
|
||||
)
|
||||
|
||||
def load_deepseek_r1():
|
||||
"""
|
||||
Loads the DeepSeek-R1 model and tokenizer.
|
||||
|
||||
Returns:
|
||||
tuple: (model, tokenizer) for DeepSeek-R1
|
||||
"""
|
||||
# Load model and tokenizer
|
||||
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
||||
|
||||
# Load model and tokenizer
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
low_cpu_mem_usage=True,
|
||||
trust_remote_code=True,
|
||||
use_cache=False,
|
||||
quantization_config=bnb_config,
|
||||
device_map={"": torch.xpu.current_device()},
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
if not model.config.pad_token_id:
|
||||
model.config.pad_token_id = model.config.eos_token_id
|
||||
|
||||
return model, tokenizer
|
||||
|
||||
# Function to get answers from the model
|
||||
def validate_question_answer_from_model(context, question, answer, model, tokenizer):
|
||||
"""
|
||||
Query the model to evaluate if an answer is a good fit for a question.
|
||||
|
||||
Args:
|
||||
context (str): The text file content
|
||||
question (str): The question
|
||||
answer (str): The answer
|
||||
model: The DeepSeek model
|
||||
tokenizer: The DeepSeek tokenizer
|
||||
|
||||
Returns:
|
||||
str: The model's evaluation to the question
|
||||
"""
|
||||
# Set up the prompt for answering the question
|
||||
prompt = f"""<task>
|
||||
You are a quality assurance expert reviewing question-answer pairs for an AI training dataset. Your task is to evaluate whether each pair meets our quality standards and is suitable for training.
|
||||
|
||||
For each question-answer pair, evaluate:
|
||||
|
||||
1. ACCURACY: Does the answer contain ONLY information from the context, without fabrications?
|
||||
2. COMPLETENESS: Does the answer fully address the question using all relevant information?
|
||||
3. RELEVANCE: Is the question meaningful and relevant to the context?
|
||||
4. NATURALNESS: Do both question and answer sound natural and conversational?
|
||||
5. DIVERSITY: Does this pair add variety to our dataset (not redundant with others)?
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
Question:
|
||||
{question}
|
||||
|
||||
Answer:
|
||||
{answer}
|
||||
|
||||
Provide your assessment as follows:
|
||||
- VERDICT: [ACCEPT/REJECT]
|
||||
- REASONING: [Brief explanation of your decision]
|
||||
- IMPROVEMENTS: [Suggestions if needed]
|
||||
</task>
|
||||
"""
|
||||
|
||||
# Generate answer
|
||||
raw_answer = generate_with_deepseek(prompt, model, tokenizer)
|
||||
|
||||
# Parse the response to get the actual answer
|
||||
answer = parse_deepseek_response(raw_answer)
|
||||
|
||||
return answer
|
||||
|
||||
|
||||
def generate_with_deepseek(prompt, model, tokenizer, max_length=4096):
|
||||
"""
|
||||
Generate text using DeepSeek-R1 model with proper handling of full output.
|
||||
|
||||
Args:
|
||||
prompt (str): The input prompt
|
||||
model: The DeepSeek model
|
||||
tokenizer: The DeepSeek tokenizer
|
||||
max_length (int): Maximum length of generated text
|
||||
|
||||
Returns:
|
||||
str: Generated text response
|
||||
"""
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
||||
|
||||
with torch.no_grad():
|
||||
# Get the full generated sequence
|
||||
generation_output = model.generate(
|
||||
**inputs,
|
||||
pad_token_id=tokenizer.eos_token_id,
|
||||
max_new_tokens=max_length,
|
||||
do_sample=True,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=False
|
||||
)
|
||||
|
||||
# Get the full output token IDs
|
||||
output_token_ids = generation_output.sequences[0]
|
||||
|
||||
# Decode the full generated text (including the prompt)
|
||||
full_output = tokenizer.decode(output_token_ids, skip_special_tokens=True)
|
||||
|
||||
# Extract only the response part by removing the prompt part
|
||||
response = full_output[len(tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
|
||||
|
||||
return response
|
||||
|
||||
def parse_deepseek_response(response):
|
||||
"""
|
||||
Parse the DeepSeek-R1 response.
|
||||
This handles DeepSeek's thinking steps and separates them from the response.
|
||||
|
||||
Args:
|
||||
response (str): The raw response from the DeepSeek model
|
||||
|
||||
Returns:
|
||||
str: The cleaned answer part of the response
|
||||
"""
|
||||
# If the response has thinking tags, extract the actual answer
|
||||
response = re.sub(r"^(<think>)?.*</think>", "", response, flags=re.DOTALL)
|
||||
|
||||
# If no special formatting detected, return the whole response
|
||||
return response.strip()
|
||||
|
||||
# Function to recursively walk a directory and process text files
|
||||
def process_directory(directory_path, output_path="results", file_extensions=(".txt",".md"), batch_size=5):
|
||||
"""
|
||||
Recursively walks a directory, processes text files, and stores results.
|
||||
|
||||
Args:
|
||||
directory_path (str): Path to directory containing text files
|
||||
output_path (str): Path to store results
|
||||
file_extensions (tuple): File extensions to process
|
||||
batch_size (int): Number of files to process before clearing cache
|
||||
"""
|
||||
# Load the DeepSeek-R1 model
|
||||
model, tokenizer = load_deepseek_r1()
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
# Use tqdm for progress tracking
|
||||
file_paths = []
|
||||
for root, _, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if file.lower().endswith(file_extensions):
|
||||
file_paths.append(os.path.join(root, file))
|
||||
|
||||
# Process files with batch-based memory management
|
||||
for i, file_path in enumerate(tqdm(file_paths, desc="Processing files")):
|
||||
process_file(file_path, model, tokenizer, output_path)
|
||||
|
||||
# Clear cache periodically to prevent memory issues
|
||||
if (i + 1) % batch_size == 0:
|
||||
if torch.xpu.is_available():
|
||||
torch.xpu.empty_cache()
|
||||
else:
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
print(f"Processing complete. Results stored in {output_path}")
|
||||
|
||||
# Function to process a single text file
|
||||
def process_file(file_path, model, tokenizer, output_path):
|
||||
"""
|
||||
Process a single text file by querying the model for questions and answers.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the text file
|
||||
model: The DeepSeek model
|
||||
tokenizer: The DeepSeek tokenizer
|
||||
output_path (str): Path to store results
|
||||
"""
|
||||
# Read the file content
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
return
|
||||
|
||||
# Skip empty files
|
||||
if not content.strip():
|
||||
print(f"Skipping empty file: {file_path}")
|
||||
return
|
||||
|
||||
# Generate a relative output path that maintains the directory structure
|
||||
relative_path = os.path.relpath(file_path, start=os.path.dirname(output_path))
|
||||
output_file = os.path.join(output_path, relative_path + ".json")
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
|
||||
# Handle potential token length issues by truncating if necessary
|
||||
# DeepSeek has a context window limit, truncate if needed
|
||||
max_content_length = 100000 # Adjust based on model's context length limit
|
||||
if len(content) > max_content_length:
|
||||
content = content[:max_content_length] + "... [Content truncated due to length]"
|
||||
|
||||
# Query model for questions
|
||||
questions = get_questions_from_model(content, model, tokenizer)
|
||||
print(f"{len(questions)} questions generated for {file_path}")
|
||||
|
||||
# Get answers for each question
|
||||
results = {
|
||||
"file_path": file_path,
|
||||
"processed_at": datetime.now().isoformat(),
|
||||
"context": content[:1000] + "..." if len(content) > 1000 else content, # Truncated context for JSON storage
|
||||
"qa_pairs": []
|
||||
}
|
||||
|
||||
# Process each question
|
||||
for i, question in enumerate(questions):
|
||||
print(f"Generating answer for question {i+1}/{len(questions)}: {question}")
|
||||
answer = get_answer_from_model(content, question, model, tokenizer)
|
||||
print(f"Answer: {answer[:50] + '...' if len(answer) > 50 else answer}")
|
||||
print(f"Evaluating response...")
|
||||
validation = validate_question_answer_from_model(content, question, answer, model, tokenizer)
|
||||
print(f"Evaluation: {validation[:50] + '...' if len(validation) > 50 else validation}")
|
||||
results["qa_pairs"].append({
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"validation": validation,
|
||||
})
|
||||
# Save results after each generate pass so they can be evaluated
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Function to get questions from the model
|
||||
def get_questions_from_model(context, model, tokenizer):
|
||||
"""
|
||||
Query the model to generate questions about the provided context.
|
||||
|
||||
Args:
|
||||
context (str): The text file content
|
||||
model: The DeepSeek model
|
||||
tokenizer: The DeepSeek tokenizer
|
||||
|
||||
Returns:
|
||||
list: List of questions about the context
|
||||
"""
|
||||
# Set up the prompt for generating questions
|
||||
prompt = f"""<task>
|
||||
You are an expert data scientist creating a training dataset. I'll provide context information about a person from their resume. Your task is to generate 10 diverse, realistic questions that someone might ask about this person.
|
||||
|
||||
IMPORTANT: DO NOT return JSON or any structured format. Respond with a simple numbered list of questions only, with no formatting, no JSON, and no additional text.
|
||||
|
||||
Generate questions that:
|
||||
- Vary in complexity (simple factual questions, complex reasoning questions)
|
||||
- Cover different aspects of the context (experience, skills, education, achievements)
|
||||
- Include both specific and general inquiries
|
||||
- Sound natural, as if asked by a real person
|
||||
- Avoid asking for information not present in the context
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
Return ONLY a plain numbered list like:
|
||||
1. First question?
|
||||
2. Second question?
|
||||
...and so on.
|
||||
|
||||
Do not include any explanations, JSON, or other formatting.
|
||||
</task>
|
||||
"""
|
||||
|
||||
# Generate questions
|
||||
raw_response = generate_with_deepseek(prompt, model, tokenizer)
|
||||
|
||||
# Parse the response to get the actual questions
|
||||
response = parse_deepseek_response(raw_response)
|
||||
|
||||
lines = response.strip().split('\n')
|
||||
|
||||
# Transform lines like "1. What is your name?" into just "What is your name?" (only include lines that end with a question mark)
|
||||
questions = [re.sub(r'^\d+\.\s*', '', line.strip()) for line in lines if line.strip() and line.strip().endswith('?')]
|
||||
if len(questions) == 0:
|
||||
print(response)
|
||||
exit(0)
|
||||
return questions
|
||||
|
||||
# Function to get answers from the model
|
||||
def get_answer_from_model(context, question, model, tokenizer):
|
||||
"""
|
||||
Query the model to answer a question about the provided context.
|
||||
|
||||
Args:
|
||||
context (str): The text file content
|
||||
question (str): The question to answer
|
||||
model: The DeepSeek model
|
||||
tokenizer: The DeepSeek tokenizer
|
||||
|
||||
Returns:
|
||||
str: The model's answer to the question
|
||||
"""
|
||||
# Set up the prompt for answering the question
|
||||
prompt = f"""<task>
|
||||
You are an AI assistant being fine-tuned to accurately represent a specific person based on their resume. Below is a question about this person and the context from their resume.
|
||||
|
||||
Your task is to provide a comprehensive, accurate answer that:
|
||||
- Only uses information explicitly stated in the context
|
||||
- Doesn't fabricate or assume additional details
|
||||
- Maintains a professional, helpful tone
|
||||
- Clearly states if the question cannot be answered based on the given context
|
||||
- Structures the response in a natural, conversational way
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
Question:
|
||||
{question}
|
||||
|
||||
Answer:
|
||||
</task>
|
||||
"""
|
||||
|
||||
# Generate answer
|
||||
raw_answer = generate_with_deepseek(prompt, model, tokenizer)
|
||||
|
||||
# Parse the response to get the actual answer
|
||||
answer = parse_deepseek_response(raw_answer)
|
||||
|
||||
return answer
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
process_directory(
|
||||
directory_path="../doc/", # Replace with your directory path
|
||||
output_path="../results",
|
||||
file_extensions=(".txt", ".md"), # Process both txt and md files
|
||||
batch_size=5 # Clear cache after every 5 files
|
||||
)
|
25
src/cli.py
@ -1,25 +0,0 @@
|
||||
# web_server.py - Web interface using Flask
|
||||
from rag_core import RagCore
|
||||
from rag_cli import RagCLI
|
||||
from flask import Flask, request, jsonify, render_template
|
||||
|
||||
# main.py - Entry point that allows choosing between CLI and web interface
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='RAG Application')
|
||||
parser.add_argument('--debug', action='store_true', help='Run in debug mode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create the core RAG application
|
||||
core = RagCore()
|
||||
|
||||
# Run CLI interface
|
||||
cli = RagCLI(core)
|
||||
cli.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
38
src/client/.gitignore
vendored
@ -1,38 +0,0 @@
|
||||
# Learn more https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files
|
||||
|
||||
# dependencies
|
||||
node_modules/
|
||||
|
||||
# Expo
|
||||
.expo/
|
||||
dist/
|
||||
web-build/
|
||||
expo-env.d.ts
|
||||
|
||||
# Native
|
||||
*.orig.*
|
||||
*.jks
|
||||
*.p8
|
||||
*.p12
|
||||
*.key
|
||||
*.mobileprovision
|
||||
|
||||
# Metro
|
||||
.metro-health-check*
|
||||
|
||||
# debug
|
||||
npm-debug.*
|
||||
yarn-debug.*
|
||||
yarn-error.*
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
|
||||
app-example
|
@ -1,50 +0,0 @@
|
||||
# Welcome to your Expo app 👋
|
||||
|
||||
This is an [Expo](https://expo.dev) project created with [`create-expo-app`](https://www.npmjs.com/package/create-expo-app).
|
||||
|
||||
## Get started
|
||||
|
||||
1. Install dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
2. Start the app
|
||||
|
||||
```bash
|
||||
npx expo start
|
||||
```
|
||||
|
||||
In the output, you'll find options to open the app in a
|
||||
|
||||
- [development build](https://docs.expo.dev/develop/development-builds/introduction/)
|
||||
- [Android emulator](https://docs.expo.dev/workflow/android-studio-emulator/)
|
||||
- [iOS simulator](https://docs.expo.dev/workflow/ios-simulator/)
|
||||
- [Expo Go](https://expo.dev/go), a limited sandbox for trying out app development with Expo
|
||||
|
||||
You can start developing by editing the files inside the **app** directory. This project uses [file-based routing](https://docs.expo.dev/router/introduction).
|
||||
|
||||
## Get a fresh project
|
||||
|
||||
When you're ready, run:
|
||||
|
||||
```bash
|
||||
npm run reset-project
|
||||
```
|
||||
|
||||
This command will move the starter code to the **app-example** directory and create a blank **app** directory where you can start developing.
|
||||
|
||||
## Learn more
|
||||
|
||||
To learn more about developing your project with Expo, look at the following resources:
|
||||
|
||||
- [Expo documentation](https://docs.expo.dev/): Learn fundamentals, or go into advanced topics with our [guides](https://docs.expo.dev/guides).
|
||||
- [Learn Expo tutorial](https://docs.expo.dev/tutorial/introduction/): Follow a step-by-step tutorial where you'll create a project that runs on Android, iOS, and the web.
|
||||
|
||||
## Join the community
|
||||
|
||||
Join our community of developers creating universal apps.
|
||||
|
||||
- [Expo on GitHub](https://github.com/expo/expo): View our open source platform and contribute.
|
||||
- [Discord community](https://chat.expo.dev): Chat with Expo users and ask questions.
|
@ -1,36 +0,0 @@
|
||||
{
|
||||
"expo": {
|
||||
"name": "Ketr-Chat",
|
||||
"slug": "ketr-chat",
|
||||
"version": "1.0.0",
|
||||
"orientation": "portrait",
|
||||
"icon": "./assets/icon.png",
|
||||
"userInterfaceStyle": "light",
|
||||
"splash": {
|
||||
"image": "./assets/splash.png",
|
||||
"resizeMode": "contain",
|
||||
"backgroundColor": "#ffffff"
|
||||
},
|
||||
"assetBundlePatterns": [
|
||||
"**/*"
|
||||
],
|
||||
"web": {
|
||||
"favicon": "./assets/favicon.png",
|
||||
"bundler": "metro"
|
||||
},
|
||||
"scheme": "myapp",
|
||||
"packagerOpts": {
|
||||
"hostType": "lan",
|
||||
"dev": true,
|
||||
"minify": false
|
||||
},
|
||||
"experiments": {
|
||||
"tsconfigPaths": true
|
||||
},
|
||||
"extra": {
|
||||
"router": {
|
||||
"origin": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Before Width: | Height: | Size: 318 B |
Before Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 318 B |
Before Width: | Height: | Size: 4.6 KiB |
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 5.0 KiB |
Before Width: | Height: | Size: 6.2 KiB |
Before Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 17 KiB |
16041
src/client/package-lock.json
generated
@ -1,58 +0,0 @@
|
||||
{
|
||||
"name": "airc",
|
||||
"main": "expo-router/entry",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"start": "expo start",
|
||||
"direct": "react-native start --reset-cache",
|
||||
"android": "expo run:android",
|
||||
"ios": "expo run:ios",
|
||||
"web": "expo start --web"
|
||||
},
|
||||
"jest": {
|
||||
"preset": "jest-expo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@expo/vector-icons": "^14.0.2",
|
||||
"@react-native/metro-config": "^0.78.1",
|
||||
"@react-navigation/bottom-tabs": "^7.2.0",
|
||||
"@react-navigation/native": "^7.0.14",
|
||||
"expo": "~52.0.37",
|
||||
"expo-blur": "~14.0.3",
|
||||
"expo-constants": "~17.0.7",
|
||||
"expo-font": "~13.0.4",
|
||||
"expo-haptics": "~14.0.1",
|
||||
"expo-linking": "~7.0.5",
|
||||
"expo-router": "~4.0.17",
|
||||
"expo-splash-screen": "~0.29.22",
|
||||
"expo-status-bar": "~2.0.1",
|
||||
"expo-symbols": "~0.2.2",
|
||||
"expo-system-ui": "~4.0.8",
|
||||
"expo-web-browser": "~14.0.2",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-fontawesome": "^1.7.1",
|
||||
"react-helmet": "^6.1.0",
|
||||
"react-native": "0.76.7",
|
||||
"react-native-gesture-handler": "~2.20.2",
|
||||
"react-native-markdown-display": "^7.0.2",
|
||||
"react-native-reanimated": "~3.16.1",
|
||||
"react-native-safe-area-context": "4.12.0",
|
||||
"react-native-screens": "~4.4.0",
|
||||
"react-native-web": "~0.19.13",
|
||||
"react-native-webview": "13.12.5",
|
||||
"react-spinners": "^0.15.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.25.2",
|
||||
"@react-native-community/cli": "^18.0.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/react": "~18.3.12",
|
||||
"@types/react-test-renderer": "^18.3.0",
|
||||
"jest": "^29.2.1",
|
||||
"jest-expo": "~52.0.4",
|
||||
"react-test-renderer": "18.3.1",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
"private": true
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
{
|
||||
"extends": "expo/tsconfig.base",
|
||||
"compilerOptions": {
|
||||
"strict": true,
|
||||
"paths": {
|
||||
"@/*": [
|
||||
"./*"
|
||||
]
|
||||
}
|
||||
},
|
||||
"include": [
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
".expo/types/**/*.ts",
|
||||
"expo-env.d.ts"
|
||||
]
|
||||
}
|
279
src/doc/projects/ze-monitor.txt
Normal file
@ -0,0 +1,279 @@
|
||||
# ze-monitor
|
||||
|
||||
A small utility to monitor Level Zero devices via
|
||||
[Level Zero Sysman](https://oneapi-src.github.io/level-zero-spec/level-zero/latest/sysman/PROG.html#sysman-programming-guide)
|
||||
from the command line, similar to 'top'.
|
||||
|
||||
# Installation
|
||||
|
||||
Requires Ubuntu Oracular 24.10.
|
||||
|
||||
## Easiest
|
||||
|
||||
### Install prerequisites
|
||||
|
||||
This will add the [Intel Graphics Preview PPA](https://github.com/canonical/intel-graphics-preview) and install the required dependencies:
|
||||
|
||||
```bash
|
||||
sudo apt-get install -y \
|
||||
software-properties-common \
|
||||
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y \
|
||||
libze1 libze-intel-gpu1 libncurses6
|
||||
```
|
||||
|
||||
### Install ze-monitor from .deb package
|
||||
|
||||
This will download the ze-monitor GitHub, install it, and add the current
|
||||
user to the 'ze-monitor' group to allow running the utility:
|
||||
|
||||
```bash
|
||||
version=0.3.0-1
|
||||
wget https://github.com/jketreno/ze-monitor/releases/download/v${version}/ze-monitor-${version}_amd64.deb
|
||||
sudo dpkg -i ze-monitor-${version}_amd64.deb
|
||||
sudo usermod -a -G ze-monitor $(whoami)
|
||||
newgrp ze-monitor
|
||||
```
|
||||
|
||||
Congratulations! You can run ze-monitor:
|
||||
|
||||
```bash
|
||||
ze-monitor
|
||||
```
|
||||
|
||||
You should see something like:
|
||||
|
||||
```bash
|
||||
Device 1: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
Device 2: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
```
|
||||
|
||||
To monitor a device:
|
||||
|
||||
```bash
|
||||
ze-monitor --device 2
|
||||
```
|
||||
|
||||
Check the docs (`man ze-monitor`) for additional details on running the ze-monitor utility.
|
||||
|
||||
## Slightly more involved
|
||||
|
||||
This project uses docker containers to build. As this was originally written to monitor an Intel Arc B580 (Battlemage), it requires a kernel that supports that hardware, such as the one documented at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10). It will monitor any Level Zero device, even those using the i915 driver.
|
||||
|
||||
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
|
||||
|
||||
```
|
||||
git clone https://github.com/jketreno/ze-monitor.git
|
||||
cd ze-monitor
|
||||
docker compose build
|
||||
sudo apt install libze1 libncurses6
|
||||
version=$(cat src/version.txt)
|
||||
docker compose run --remove-orphans --rm \
|
||||
ze-monitor \
|
||||
cp /opt/ze-monitor-static/build/ze-monitor-${version}_amd64.deb \
|
||||
/opt/ze-monitor/build
|
||||
sudo dpkg -i build/ze-monitor-${version}_amd64.deb
|
||||
```
|
||||
|
||||
# Security
|
||||
|
||||
In order for ze-monitor to read the performance metric units (PMU) in the Linux kernel, it needs elevated permissions. The easiest way is to install the .deb package and add the user to the ze-monitor group. Or, run under sudo (eg., `sudo ze-monitor ...`.)
|
||||
|
||||
The specific capabilities required to monitor the GPU are documented in [Perf Security](https://www.kernel.org/doc/html/v5.1/admin-guide/perf-security.html) and [man capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). These include:
|
||||
|
||||
| Capability | Reason |
|
||||
|:--------------------|:-----------------------------------------------------|
|
||||
| CAP_DAC_READ_SEARCH | Bypass all filesystem read access checks |
|
||||
| CAP_PERFMON | Access to perf_events (vs. overloaded CAP_SYS_ADMIN) |
|
||||
| CAP_SYS_PTRACE | PTRACE_MODE_READ_REALCREDS ptrace access mode check |
|
||||
|
||||
To configure ze-monitor to run with those privileges, you can use `setcap` to set the correct capabilities on ze-monitor. You can further secure your system by creating a user group specifically for running the utility and restrict running of that command to users in that group. That is what the .deb package does.
|
||||
|
||||
If you install the .deb package from a [Release](https://github.com/jketreno/ze-monitor/releases) or by building it, that package will set the appropriate permissions for ze-monitor on installation and set it executable only to those in the 'ze-monitor' group.
|
||||
|
||||
## Anyone can run ze-monitor
|
||||
|
||||
If you build from source and want to set the capabilities:
|
||||
|
||||
```bash
|
||||
sudo setcap "cap_perfmon,cap_dac_read_search,cap_sys_ptrace=ep" build/ze-monitor
|
||||
getcap build/ze-monitor
|
||||
```
|
||||
|
||||
Any user can then run `build/ze-monitor` and monitor the GPU.
|
||||
|
||||
# Build outside container
|
||||
|
||||
## Prerequisites
|
||||
|
||||
If you would like to build outside of docker, you need the following packages installed:
|
||||
|
||||
```
|
||||
sudo apt-get install -y \
|
||||
build-essential \
|
||||
libfmt-dev \
|
||||
libncurses-dev
|
||||
```
|
||||
|
||||
In addition, you need the Intel drivers installed, which are available from the `kobuk-team/intel-graphics` PPA:
|
||||
|
||||
```
|
||||
sudo apt-get install -y \
|
||||
software-properties-common \
|
||||
&& sudo add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y \
|
||||
libze-intel-gpu1 \
|
||||
libze1 \
|
||||
libze-dev
|
||||
```
|
||||
## Building
|
||||
|
||||
```
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
```
|
||||
build/ze-monitor
|
||||
```
|
||||
|
||||
## Build and install .deb
|
||||
|
||||
In order to build the .deb package, you need the following packages installed:
|
||||
|
||||
```bash
|
||||
sudo apt-get install -y \
|
||||
debhelper \
|
||||
devscripts \
|
||||
rpm \
|
||||
rpm2cpio
|
||||
```
|
||||
|
||||
You can then build the .deb:
|
||||
|
||||
```bash
|
||||
if [ -d build ]; then
|
||||
cd build
|
||||
fi
|
||||
version=$(cat ../src/version.txt)
|
||||
cpack
|
||||
sudo dpkg -i build/packages/ze-monitor_${version}_amd64.deb
|
||||
```
|
||||
|
||||
You can then run ze-monitor from your path:
|
||||
|
||||
```bash
|
||||
ze-monitor
|
||||
```
|
||||
|
||||
# Developing
|
||||
|
||||
To run the built binary without building a full .deb package, you can build and run on the host by compiling in the container:
|
||||
|
||||
```
|
||||
docker compose run --rm ze-monitor build.sh
|
||||
build/ze-monitor
|
||||
```
|
||||
|
||||
The build.sh script will build the binary in /opt/ze-monitor/build, which is volume mounted to the host's build directory.
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
# Running
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
If running within a docker container, the container environment does not have access to the host's `/proc/fd`, which is necessary to obtain information about the processes outside the current container which are using the GPU. As such, only processes running within that container running ze-monitor will be listed as using the GPU.
|
||||
|
||||
## List available devices
|
||||
|
||||
```
|
||||
ze-monitor
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```bash
|
||||
$ ze-monitor
|
||||
Device 1: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
Device 2: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
```
|
||||
|
||||
## Show details for a given device
|
||||
|
||||
```
|
||||
sudo ze-monitor --info --device ( PCIID | # | BDF | UUID | /dev/dri/render*)
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```bash
|
||||
$ sudo ze-monitor --device 2 --info
|
||||
Device: 8086:A780 (Intel(R) UHD Graphics 770)
|
||||
UUID: 868080A7-0400-0000-0002-000000000000
|
||||
BDF: 0000:0000:0002:0000
|
||||
PCI ID: 8086:A780
|
||||
Subdevices: 0
|
||||
Serial Number: unknown
|
||||
Board Number: unknown
|
||||
Brand Name: unknown
|
||||
Model Name: Intel(R) UHD Graphics 770
|
||||
Vendor Name: Intel(R) Corporation
|
||||
Driver Version: 0CB7EFCAD5695B7EC5C8CE6
|
||||
Type: GPU
|
||||
Is integrated with host: Yes
|
||||
Is a sub-device: No
|
||||
Supports error correcting memory: No
|
||||
Supports on-demand page-faulting: No
|
||||
Engines: 7
|
||||
Engine 1: ZES_ENGINE_GROUP_RENDER_SINGLE
|
||||
Engine 2: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
|
||||
Engine 3: ZES_ENGINE_GROUP_MEDIA_DECODE_SINGLE
|
||||
Engine 4: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
|
||||
Engine 5: ZES_ENGINE_GROUP_MEDIA_ENCODE_SINGLE
|
||||
Engine 6: ZES_ENGINE_GROUP_COPY_SINGLE
|
||||
Engine 7: ZES_ENGINE_GROUP_MEDIA_ENHANCEMENT_SINGLE
|
||||
Temperature Sensors: 0
|
||||
```
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
## Monitor a given device
|
||||
|
||||
```
|
||||
sudo ze-monitor --device ( PCIID | # | BDF | UUID | /dev/dri/render* ) \
|
||||
--interval ms
|
||||
```
|
||||
|
||||
NOTE: See [Security](#security) for information on running ze-monitor with required kernel access capabilities.
|
||||
|
||||
Output:
|
||||
|
||||
```bash
|
||||
$ sudo ze-monitor --device 2 --interval 500
|
||||
Device: 8086:E20B (Intel(R) Graphics [0xe20b])
|
||||
Total Memory: 12809404416
|
||||
Free memory: [# 55% ############################ ]
|
||||
Power usage: 165.0W
|
||||
------------------------------------------------------------------------------------------
|
||||
PID COMMAND-LINE
|
||||
USED MEMORY SHARED MEMORY ENGINE FLAGS
|
||||
------------------------------------------------------------------------------------------
|
||||
1 /sbin/init splash
|
||||
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
|
||||
1606 /usr/lib/systemd/systemd-logind
|
||||
MEM: 106102784 SHR: 100663296 FLAGS: RENDER COMPUTE
|
||||
5164 /usr/bin/gnome-shell
|
||||
MEM: 530513920 SHR: 503316480 FLAGS: RENDER COMPUTE
|
||||
5237 /usr/bin/Xwayland :1024 -rootless -nores...isplayfd 6 -initfd 7 -byteswappedclients
|
||||
MEM: 0 SHR: 0 FLAGS:
|
||||
40480 python chat.py
|
||||
MEM: 5544226816 SHR: 0 FLAGS: DMA COMPUTE
|
||||
```
|
||||
|
||||
If you pass `--one-shot`, statistics will be gathered, displayed, and then ze-monitor will exit.
|
@ -1,44 +0,0 @@
|
||||
#
|
||||
# Copyright 2016 The BigDL Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
|
||||
import torch
|
||||
from transformers import AutoTokenizer
|
||||
import argparse
|
||||
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
common_util_path = os.path.join(current_dir, '..')
|
||||
import sys
|
||||
sys.path.append(common_util_path)
|
||||
from common.utils import merge_adapter
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description='Merge the adapter into the original model for Llama2 model')
|
||||
parser.add_argument('--repo-id-or-model-path', type=str, default="meta-llama/Llama-2-7b-hf",
|
||||
help='The huggingface repo id for the Llama2 (e.g. `meta-llama/Llama-2-7b-hf` and `meta-llama/Llama-2-13b-chat-hf`) to be downloaded'
|
||||
', or the path to the huggingface checkpoint folder')
|
||||
parser.add_argument('--adapter_path', type=str,)
|
||||
parser.add_argument('--output_path', type=str,)
|
||||
|
||||
args = parser.parse_args()
|
||||
base_model = model_path = args.repo_id_or_model_path
|
||||
adapter_path = args.adapter_path
|
||||
output_path = args.output_path
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
merge_adapter(base_model, tokenizer, adapter_path, output_path)
|
||||
print(f'Finish to merge the adapter into the original model and you could find the merged model in {output_path}.')
|
@ -1,202 +0,0 @@
|
||||
---
|
||||
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
||||
library_name: peft
|
||||
---
|
||||
|
||||
# Model Card for Model ID
|
||||
|
||||
<!-- Provide a quick summary of what the model is/does. -->
|
||||
|
||||
|
||||
|
||||
## Model Details
|
||||
|
||||
### Model Description
|
||||
|
||||
<!-- Provide a longer summary of what this model is. -->
|
||||
|
||||
|
||||
|
||||
- **Developed by:** [More Information Needed]
|
||||
- **Funded by [optional]:** [More Information Needed]
|
||||
- **Shared by [optional]:** [More Information Needed]
|
||||
- **Model type:** [More Information Needed]
|
||||
- **Language(s) (NLP):** [More Information Needed]
|
||||
- **License:** [More Information Needed]
|
||||
- **Finetuned from model [optional]:** [More Information Needed]
|
||||
|
||||
### Model Sources [optional]
|
||||
|
||||
<!-- Provide the basic links for the model. -->
|
||||
|
||||
- **Repository:** [More Information Needed]
|
||||
- **Paper [optional]:** [More Information Needed]
|
||||
- **Demo [optional]:** [More Information Needed]
|
||||
|
||||
## Uses
|
||||
|
||||
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
||||
|
||||
### Direct Use
|
||||
|
||||
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Downstream Use [optional]
|
||||
|
||||
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Out-of-Scope Use
|
||||
|
||||
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Bias, Risks, and Limitations
|
||||
|
||||
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Recommendations
|
||||
|
||||
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
||||
|
||||
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
||||
|
||||
## How to Get Started with the Model
|
||||
|
||||
Use the code below to get started with the model.
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Training Details
|
||||
|
||||
### Training Data
|
||||
|
||||
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Training Procedure
|
||||
|
||||
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
||||
|
||||
#### Preprocessing [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
|
||||
#### Training Hyperparameters
|
||||
|
||||
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
||||
|
||||
#### Speeds, Sizes, Times [optional]
|
||||
|
||||
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Evaluation
|
||||
|
||||
<!-- This section describes the evaluation protocols and provides the results. -->
|
||||
|
||||
### Testing Data, Factors & Metrics
|
||||
|
||||
#### Testing Data
|
||||
|
||||
<!-- This should link to a Dataset Card if possible. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Factors
|
||||
|
||||
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Metrics
|
||||
|
||||
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Results
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Summary
|
||||
|
||||
|
||||
|
||||
## Model Examination [optional]
|
||||
|
||||
<!-- Relevant interpretability work for the model goes here -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Environmental Impact
|
||||
|
||||
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
||||
|
||||
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
||||
|
||||
- **Hardware Type:** [More Information Needed]
|
||||
- **Hours used:** [More Information Needed]
|
||||
- **Cloud Provider:** [More Information Needed]
|
||||
- **Compute Region:** [More Information Needed]
|
||||
- **Carbon Emitted:** [More Information Needed]
|
||||
|
||||
## Technical Specifications [optional]
|
||||
|
||||
### Model Architecture and Objective
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Compute Infrastructure
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Hardware
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Software
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Citation [optional]
|
||||
|
||||
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
||||
|
||||
**BibTeX:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
**APA:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Glossary [optional]
|
||||
|
||||
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## More Information [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Authors [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Contact
|
||||
|
||||
[More Information Needed]
|
||||
### Framework versions
|
||||
|
||||
- PEFT 0.14.0
|
@ -1,37 +0,0 @@
|
||||
{
|
||||
"alpha_pattern": {},
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
"bias": "none",
|
||||
"eva_config": null,
|
||||
"exclude_modules": null,
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layer_replication": null,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"loftq_config": {},
|
||||
"lora_alpha": 16,
|
||||
"lora_bias": false,
|
||||
"lora_dropout": 0.05,
|
||||
"megatron_config": null,
|
||||
"megatron_core": "megatron.core",
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 8,
|
||||
"rank_pattern": {},
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"o_proj",
|
||||
"q_proj",
|
||||
"gate_proj",
|
||||
"k_proj",
|
||||
"down_proj",
|
||||
"up_proj",
|
||||
"v_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM",
|
||||
"use_dora": false,
|
||||
"use_rslora": false
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 10.0,
|
||||
"eval_steps": 100,
|
||||
"global_step": 10,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [],
|
||||
"logging_steps": 100,
|
||||
"max_steps": 10,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 10,
|
||||
"save_steps": 500,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 763304356085760.0,
|
||||
"train_batch_size": 1,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
@ -1,202 +0,0 @@
|
||||
---
|
||||
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
||||
library_name: peft
|
||||
---
|
||||
|
||||
# Model Card for Model ID
|
||||
|
||||
<!-- Provide a quick summary of what the model is/does. -->
|
||||
|
||||
|
||||
|
||||
## Model Details
|
||||
|
||||
### Model Description
|
||||
|
||||
<!-- Provide a longer summary of what this model is. -->
|
||||
|
||||
|
||||
|
||||
- **Developed by:** [More Information Needed]
|
||||
- **Funded by [optional]:** [More Information Needed]
|
||||
- **Shared by [optional]:** [More Information Needed]
|
||||
- **Model type:** [More Information Needed]
|
||||
- **Language(s) (NLP):** [More Information Needed]
|
||||
- **License:** [More Information Needed]
|
||||
- **Finetuned from model [optional]:** [More Information Needed]
|
||||
|
||||
### Model Sources [optional]
|
||||
|
||||
<!-- Provide the basic links for the model. -->
|
||||
|
||||
- **Repository:** [More Information Needed]
|
||||
- **Paper [optional]:** [More Information Needed]
|
||||
- **Demo [optional]:** [More Information Needed]
|
||||
|
||||
## Uses
|
||||
|
||||
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
||||
|
||||
### Direct Use
|
||||
|
||||
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Downstream Use [optional]
|
||||
|
||||
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Out-of-Scope Use
|
||||
|
||||
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Bias, Risks, and Limitations
|
||||
|
||||
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Recommendations
|
||||
|
||||
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
||||
|
||||
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
||||
|
||||
## How to Get Started with the Model
|
||||
|
||||
Use the code below to get started with the model.
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Training Details
|
||||
|
||||
### Training Data
|
||||
|
||||
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Training Procedure
|
||||
|
||||
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
||||
|
||||
#### Preprocessing [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
|
||||
#### Training Hyperparameters
|
||||
|
||||
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
||||
|
||||
#### Speeds, Sizes, Times [optional]
|
||||
|
||||
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Evaluation
|
||||
|
||||
<!-- This section describes the evaluation protocols and provides the results. -->
|
||||
|
||||
### Testing Data, Factors & Metrics
|
||||
|
||||
#### Testing Data
|
||||
|
||||
<!-- This should link to a Dataset Card if possible. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Factors
|
||||
|
||||
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Metrics
|
||||
|
||||
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Results
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Summary
|
||||
|
||||
|
||||
|
||||
## Model Examination [optional]
|
||||
|
||||
<!-- Relevant interpretability work for the model goes here -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Environmental Impact
|
||||
|
||||
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
||||
|
||||
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
||||
|
||||
- **Hardware Type:** [More Information Needed]
|
||||
- **Hours used:** [More Information Needed]
|
||||
- **Cloud Provider:** [More Information Needed]
|
||||
- **Compute Region:** [More Information Needed]
|
||||
- **Carbon Emitted:** [More Information Needed]
|
||||
|
||||
## Technical Specifications [optional]
|
||||
|
||||
### Model Architecture and Objective
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Compute Infrastructure
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Hardware
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Software
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Citation [optional]
|
||||
|
||||
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
||||
|
||||
**BibTeX:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
**APA:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Glossary [optional]
|
||||
|
||||
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## More Information [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Authors [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Contact
|
||||
|
||||
[More Information Needed]
|
||||
### Framework versions
|
||||
|
||||
- PEFT 0.14.0
|
@ -1,37 +0,0 @@
|
||||
{
|
||||
"alpha_pattern": {},
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
"bias": "none",
|
||||
"eva_config": null,
|
||||
"exclude_modules": null,
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layer_replication": null,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"loftq_config": {},
|
||||
"lora_alpha": 16,
|
||||
"lora_bias": false,
|
||||
"lora_dropout": 0.05,
|
||||
"megatron_config": null,
|
||||
"megatron_core": "megatron.core",
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 8,
|
||||
"rank_pattern": {},
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"v_proj",
|
||||
"o_proj",
|
||||
"gate_proj",
|
||||
"down_proj",
|
||||
"q_proj",
|
||||
"up_proj",
|
||||
"k_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM",
|
||||
"use_dora": false,
|
||||
"use_rslora": false
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 3.0,
|
||||
"eval_steps": 100,
|
||||
"global_step": 3,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [],
|
||||
"logging_steps": 100,
|
||||
"max_steps": 3,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 3,
|
||||
"save_steps": 500,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 228991306825728.0,
|
||||
"train_batch_size": 1,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
@ -1,202 +0,0 @@
|
||||
---
|
||||
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
||||
library_name: peft
|
||||
---
|
||||
|
||||
# Model Card for Model ID
|
||||
|
||||
<!-- Provide a quick summary of what the model is/does. -->
|
||||
|
||||
|
||||
|
||||
## Model Details
|
||||
|
||||
### Model Description
|
||||
|
||||
<!-- Provide a longer summary of what this model is. -->
|
||||
|
||||
|
||||
|
||||
- **Developed by:** [More Information Needed]
|
||||
- **Funded by [optional]:** [More Information Needed]
|
||||
- **Shared by [optional]:** [More Information Needed]
|
||||
- **Model type:** [More Information Needed]
|
||||
- **Language(s) (NLP):** [More Information Needed]
|
||||
- **License:** [More Information Needed]
|
||||
- **Finetuned from model [optional]:** [More Information Needed]
|
||||
|
||||
### Model Sources [optional]
|
||||
|
||||
<!-- Provide the basic links for the model. -->
|
||||
|
||||
- **Repository:** [More Information Needed]
|
||||
- **Paper [optional]:** [More Information Needed]
|
||||
- **Demo [optional]:** [More Information Needed]
|
||||
|
||||
## Uses
|
||||
|
||||
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
||||
|
||||
### Direct Use
|
||||
|
||||
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Downstream Use [optional]
|
||||
|
||||
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Out-of-Scope Use
|
||||
|
||||
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Bias, Risks, and Limitations
|
||||
|
||||
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Recommendations
|
||||
|
||||
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
||||
|
||||
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
||||
|
||||
## How to Get Started with the Model
|
||||
|
||||
Use the code below to get started with the model.
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Training Details
|
||||
|
||||
### Training Data
|
||||
|
||||
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Training Procedure
|
||||
|
||||
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
||||
|
||||
#### Preprocessing [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
|
||||
#### Training Hyperparameters
|
||||
|
||||
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
||||
|
||||
#### Speeds, Sizes, Times [optional]
|
||||
|
||||
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Evaluation
|
||||
|
||||
<!-- This section describes the evaluation protocols and provides the results. -->
|
||||
|
||||
### Testing Data, Factors & Metrics
|
||||
|
||||
#### Testing Data
|
||||
|
||||
<!-- This should link to a Dataset Card if possible. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Factors
|
||||
|
||||
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Metrics
|
||||
|
||||
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Results
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Summary
|
||||
|
||||
|
||||
|
||||
## Model Examination [optional]
|
||||
|
||||
<!-- Relevant interpretability work for the model goes here -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Environmental Impact
|
||||
|
||||
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
||||
|
||||
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
||||
|
||||
- **Hardware Type:** [More Information Needed]
|
||||
- **Hours used:** [More Information Needed]
|
||||
- **Cloud Provider:** [More Information Needed]
|
||||
- **Compute Region:** [More Information Needed]
|
||||
- **Carbon Emitted:** [More Information Needed]
|
||||
|
||||
## Technical Specifications [optional]
|
||||
|
||||
### Model Architecture and Objective
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Compute Infrastructure
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Hardware
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Software
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Citation [optional]
|
||||
|
||||
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
||||
|
||||
**BibTeX:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
**APA:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Glossary [optional]
|
||||
|
||||
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## More Information [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Authors [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Contact
|
||||
|
||||
[More Information Needed]
|
||||
### Framework versions
|
||||
|
||||
- PEFT 0.14.0
|
@ -1,37 +0,0 @@
|
||||
{
|
||||
"alpha_pattern": {},
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
"bias": "none",
|
||||
"eva_config": null,
|
||||
"exclude_modules": null,
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layer_replication": null,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"loftq_config": {},
|
||||
"lora_alpha": 16,
|
||||
"lora_bias": false,
|
||||
"lora_dropout": 0.05,
|
||||
"megatron_config": null,
|
||||
"megatron_core": "megatron.core",
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 8,
|
||||
"rank_pattern": {},
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"up_proj",
|
||||
"k_proj",
|
||||
"o_proj",
|
||||
"down_proj",
|
||||
"v_proj",
|
||||
"gate_proj",
|
||||
"q_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM",
|
||||
"use_dora": false,
|
||||
"use_rslora": false
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 8.592592592592592,
|
||||
"eval_steps": 100,
|
||||
"global_step": 60,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [],
|
||||
"logging_steps": 100,
|
||||
"max_steps": 60,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 10,
|
||||
"save_steps": 500,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 4427165265297408.0,
|
||||
"train_batch_size": 1,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
@ -1,202 +0,0 @@
|
||||
---
|
||||
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
||||
library_name: peft
|
||||
---
|
||||
|
||||
# Model Card for Model ID
|
||||
|
||||
<!-- Provide a quick summary of what the model is/does. -->
|
||||
|
||||
|
||||
|
||||
## Model Details
|
||||
|
||||
### Model Description
|
||||
|
||||
<!-- Provide a longer summary of what this model is. -->
|
||||
|
||||
|
||||
|
||||
- **Developed by:** [More Information Needed]
|
||||
- **Funded by [optional]:** [More Information Needed]
|
||||
- **Shared by [optional]:** [More Information Needed]
|
||||
- **Model type:** [More Information Needed]
|
||||
- **Language(s) (NLP):** [More Information Needed]
|
||||
- **License:** [More Information Needed]
|
||||
- **Finetuned from model [optional]:** [More Information Needed]
|
||||
|
||||
### Model Sources [optional]
|
||||
|
||||
<!-- Provide the basic links for the model. -->
|
||||
|
||||
- **Repository:** [More Information Needed]
|
||||
- **Paper [optional]:** [More Information Needed]
|
||||
- **Demo [optional]:** [More Information Needed]
|
||||
|
||||
## Uses
|
||||
|
||||
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
||||
|
||||
### Direct Use
|
||||
|
||||
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Downstream Use [optional]
|
||||
|
||||
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Out-of-Scope Use
|
||||
|
||||
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Bias, Risks, and Limitations
|
||||
|
||||
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Recommendations
|
||||
|
||||
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
||||
|
||||
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
||||
|
||||
## How to Get Started with the Model
|
||||
|
||||
Use the code below to get started with the model.
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Training Details
|
||||
|
||||
### Training Data
|
||||
|
||||
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Training Procedure
|
||||
|
||||
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
||||
|
||||
#### Preprocessing [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
|
||||
#### Training Hyperparameters
|
||||
|
||||
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
||||
|
||||
#### Speeds, Sizes, Times [optional]
|
||||
|
||||
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Evaluation
|
||||
|
||||
<!-- This section describes the evaluation protocols and provides the results. -->
|
||||
|
||||
### Testing Data, Factors & Metrics
|
||||
|
||||
#### Testing Data
|
||||
|
||||
<!-- This should link to a Dataset Card if possible. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Factors
|
||||
|
||||
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Metrics
|
||||
|
||||
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Results
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Summary
|
||||
|
||||
|
||||
|
||||
## Model Examination [optional]
|
||||
|
||||
<!-- Relevant interpretability work for the model goes here -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Environmental Impact
|
||||
|
||||
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
||||
|
||||
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
||||
|
||||
- **Hardware Type:** [More Information Needed]
|
||||
- **Hours used:** [More Information Needed]
|
||||
- **Cloud Provider:** [More Information Needed]
|
||||
- **Compute Region:** [More Information Needed]
|
||||
- **Carbon Emitted:** [More Information Needed]
|
||||
|
||||
## Technical Specifications [optional]
|
||||
|
||||
### Model Architecture and Objective
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
### Compute Infrastructure
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Hardware
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
#### Software
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Citation [optional]
|
||||
|
||||
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
||||
|
||||
**BibTeX:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
**APA:**
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Glossary [optional]
|
||||
|
||||
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## More Information [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Authors [optional]
|
||||
|
||||
[More Information Needed]
|
||||
|
||||
## Model Card Contact
|
||||
|
||||
[More Information Needed]
|
||||
### Framework versions
|
||||
|
||||
- PEFT 0.14.0
|
@ -1,37 +0,0 @@
|
||||
{
|
||||
"alpha_pattern": {},
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
"bias": "none",
|
||||
"eva_config": null,
|
||||
"exclude_modules": null,
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layer_replication": null,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"loftq_config": {},
|
||||
"lora_alpha": 16,
|
||||
"lora_bias": false,
|
||||
"lora_dropout": 0.05,
|
||||
"megatron_config": null,
|
||||
"megatron_core": "megatron.core",
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 8,
|
||||
"rank_pattern": {},
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"up_proj",
|
||||
"k_proj",
|
||||
"o_proj",
|
||||
"down_proj",
|
||||
"v_proj",
|
||||
"gate_proj",
|
||||
"q_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM",
|
||||
"use_dora": false,
|
||||
"use_rslora": false
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|begin▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|end▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|end▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
@ -1,195 +0,0 @@
|
||||
{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"add_prefix_space": null,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|end▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|User|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|Assistant|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|begin▁of▁sentence|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|EOT|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151648": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151649": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"bos_token": "<|begin▁of▁sentence|>",
|
||||
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|end▁of▁sentence|>",
|
||||
"extra_special_tokens": {},
|
||||
"legacy": true,
|
||||
"model_max_length": 16384,
|
||||
"pad_token": "<|end▁of▁sentence|>",
|
||||
"sp_model_kwargs": {},
|
||||
"tokenizer_class": "LlamaTokenizerFast",
|
||||
"unk_token": null,
|
||||
"use_default_system_prompt": false
|
||||
}
|
@ -1,258 +0,0 @@
|
||||
import os
|
||||
import json
|
||||
import torch
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
from ipex_llm.transformers import AutoModelForCausalLM
|
||||
import transformers
|
||||
import logging as log
|
||||
import time
|
||||
import re
|
||||
|
||||
# Function to recursively walk a directory and process text files
|
||||
def process_directory(directory_path, model, output_path="results", file_extensions=(".txt",)):
|
||||
"""
|
||||
Recursively walks a directory, processes text files, and stores results.
|
||||
|
||||
Args:
|
||||
directory_path (str): Path to directory containing text files
|
||||
model: The LLM model to use for queries
|
||||
output_path (str): Path to store results
|
||||
file_extensions (tuple): File extensions to process
|
||||
"""
|
||||
# Create output directory if it doesn't exist
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
# Use tqdm for progress tracking
|
||||
file_paths = []
|
||||
for root, _, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if file.lower().endswith(file_extensions):
|
||||
file_paths.append(os.path.join(root, file))
|
||||
|
||||
for file_path in tqdm(file_paths, desc="Processing files"):
|
||||
process_file(file_path, model, output_path)
|
||||
|
||||
print(f"Processing complete. Results stored in {output_path}")
|
||||
|
||||
# Function to process a single text file
|
||||
def process_file(file_path, model, output_path):
|
||||
"""
|
||||
Process a single text file by querying the model for questions and answers.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the text file
|
||||
model: The LLM model to use for queries
|
||||
output_path (str): Path to store results
|
||||
"""
|
||||
# Read the file content
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
return
|
||||
|
||||
# Skip empty files
|
||||
if not content.strip():
|
||||
print(f"Skipping empty file: {file_path}")
|
||||
return
|
||||
|
||||
# Generate a relative output path that maintains the directory structure
|
||||
relative_path = os.path.relpath(file_path, start=os.path.dirname(output_path))
|
||||
sidecar_file = os.path.join(output_path, relative_path + ".json")
|
||||
sidecar_qa_file = os.path.join(output_path, relative_path + ".qa.json")
|
||||
os.makedirs(os.path.dirname(sidecar_file), exist_ok=True)
|
||||
|
||||
print(f"Generating questions for {file_path}")
|
||||
|
||||
# Query model for questions
|
||||
questions, think = get_questions_from_model(content, model)
|
||||
|
||||
# Get answers for each question
|
||||
results = {
|
||||
"file_path": file_path,
|
||||
"processed_at": datetime.now().isoformat(),
|
||||
"context": content[:1000] + "..." if len(content) > 1000 else content, # Truncated context for JSON storage
|
||||
"think": think,
|
||||
"qa_pairs": len(questions)
|
||||
}
|
||||
with open(sidecar_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Process each question
|
||||
results = []
|
||||
for i, question in enumerate(questions):
|
||||
print(f"Generating answer for {i+1}/{len(questions)}: {question}")
|
||||
answer, think = get_answer_from_model(content, question, model)
|
||||
results.append({
|
||||
"think": think,
|
||||
"question": question,
|
||||
"answer": answer
|
||||
})
|
||||
# Save results -- do this in loop so we can monitor the progress
|
||||
with open(sidecar_qa_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
|
||||
# Function to get questions from the model
|
||||
def get_questions_from_model(context, model):
|
||||
"""
|
||||
Query the model to generate questions about the provided context.
|
||||
|
||||
Args:
|
||||
context (str): The text file content
|
||||
model: The LLM model to use for queries
|
||||
|
||||
Returns:
|
||||
list: List of questions about the context
|
||||
"""
|
||||
# Set up the prompt for generating questions
|
||||
prompt = f"""<task>
|
||||
You are an expert at generating insightful questions about text content.
|
||||
Please read the following text and generate 10 thoughtful questions that would help someone
|
||||
understand the key information, concepts, or insights contained in it.
|
||||
|
||||
TEXT:
|
||||
{context}
|
||||
|
||||
Generate a list of questions, one per line.
|
||||
</task>
|
||||
"""
|
||||
|
||||
# Use context manager to avoid CUDA memory leaks
|
||||
with torch.no_grad():
|
||||
# This is where we would query the model - implementation depends on model type
|
||||
response, think, duration = model.generate(prompt)
|
||||
|
||||
# Parse questions from the response
|
||||
# This parsing logic may need adjustment based on model output format
|
||||
questions = [q.strip() for q in response.strip().split('\n') if q.strip()]
|
||||
|
||||
# Filter out any non-questions
|
||||
questions = [q for q in questions if q.endswith('?')]
|
||||
|
||||
# Remove any ^\s*{0-9}+.\s* from each entry
|
||||
questions = [re.sub(r"^\s*\d+\.\s*", "", q) for q in questions]
|
||||
|
||||
return questions, think
|
||||
|
||||
# Function to get answers from the model
|
||||
def get_answer_from_model(context, question, model):
|
||||
"""
|
||||
Query the model to answer a question about the provided context.
|
||||
|
||||
Args:
|
||||
context (str): The text file content
|
||||
question (str): The question to answer
|
||||
model: The LLM model to use for queries
|
||||
|
||||
Returns:
|
||||
str: The model's answer to the question
|
||||
"""
|
||||
# Set up the prompt for answering the question
|
||||
prompt = f"""<task>
|
||||
Please answer the following question based solely on the provided context.
|
||||
|
||||
CONTEXT:
|
||||
{context}
|
||||
|
||||
QUESTION:
|
||||
{question}
|
||||
</task>
|
||||
"""
|
||||
|
||||
# Use context manager to avoid CUDA memory leaks
|
||||
with torch.no_grad():
|
||||
# Query the model
|
||||
answer, think, duration = model.generate(prompt)
|
||||
|
||||
return answer, think
|
||||
|
||||
class LLM:
|
||||
def __init__(self, model_path='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', device_name='xpu'):
|
||||
self.model_path = model_path
|
||||
self.device_name = device_name
|
||||
self.tokenizer = None
|
||||
self.model = None
|
||||
try:
|
||||
log.info(f"Loading tokenizer from: {self.model_path}")
|
||||
start = datetime.now()
|
||||
|
||||
self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
|
||||
if self.tokenizer.pad_token is None:
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad_token to eos_token if needed
|
||||
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_path,
|
||||
load_in_4bit=True,
|
||||
optimize_model=True,
|
||||
trust_remote_code=True,
|
||||
use_cache=True)
|
||||
self.model = self.model.half().to(device_name)
|
||||
except Exception as e:
|
||||
log.error(f"Loading error: {e}")
|
||||
raise Exception(e)
|
||||
|
||||
# returns response, think, timedelta
|
||||
def generate(self, prompt, max_length=4096):
|
||||
if not self.tokenizer or not self.model:
|
||||
raise Exception("tokenizer or model is None")
|
||||
start = datetime.now()
|
||||
response = None
|
||||
think = None
|
||||
# print(f"{'='*10} PROMPT {'='*10}\n{prompt}")
|
||||
with torch.autocast(self.device_name, dtype=torch.float16):
|
||||
inputs = self.tokenizer.encode_plus(
|
||||
prompt,
|
||||
return_tensors="pt"
|
||||
).to(self.device_name)
|
||||
|
||||
with torch.no_grad():
|
||||
generation_output = self.model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=max_length,
|
||||
do_sample=True,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=False,
|
||||
pad_token_id=self.tokenizer.eos_token_id
|
||||
)
|
||||
|
||||
# Get the full output token IDs
|
||||
output_token_ids = generation_output.sequences[0]
|
||||
|
||||
# Decode the full generated text (including the prompt)
|
||||
full_output = self.tokenizer.decode(output_token_ids, skip_special_tokens=True)
|
||||
|
||||
# Extract only the response part by removing the prompt part
|
||||
response = full_output[len(self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)):]
|
||||
|
||||
matches = re.match(r'^(<think>)?(.*?)</think>(.*)$', response, flags=re.DOTALL)
|
||||
if matches:
|
||||
think = matches[2]
|
||||
response = matches[3]
|
||||
else:
|
||||
think = None
|
||||
response = response
|
||||
# print(f"{'='*10} THINK {'='*10}\n{think}")
|
||||
# print(f"{'='*10} RESPONSE {'='*10}\n{response}")
|
||||
end = datetime.now()
|
||||
|
||||
return response, think, end - start
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Create a dummy model instance - would be replaced by actual model in real usage
|
||||
model = LLM()
|
||||
|
||||
# Process the directory
|
||||
process_directory(
|
||||
directory_path="./doc", # Replace with your directory path
|
||||
model=model,
|
||||
output_path="./results",
|
||||
file_extensions=(".txt", ".md") # Process both txt and md files
|
||||
)
|
144
src/ipex.py
@ -1,144 +0,0 @@
|
||||
import intel_extension_for_pytorch as ipex
|
||||
import inspect
|
||||
|
||||
def inspect_module(module, prefix=""):
|
||||
"""Recursively inspect a module and list all its attributes and functions."""
|
||||
results = {
|
||||
"functions": [],
|
||||
"attributes": [],
|
||||
"classes": [],
|
||||
"modules": []
|
||||
}
|
||||
|
||||
for name in dir(module):
|
||||
# Skip private/hidden attributes
|
||||
if name.startswith("_") and name != "__version__":
|
||||
continue
|
||||
|
||||
full_name = f"{prefix}.{name}" if prefix else name
|
||||
try:
|
||||
attr = getattr(module, name)
|
||||
|
||||
# Check if it's a function
|
||||
if inspect.isfunction(attr) or inspect.isbuiltin(attr) or inspect.ismethod(attr):
|
||||
results["functions"].append((full_name, str(inspect.signature(attr)) if not inspect.isbuiltin(attr) else ""))
|
||||
|
||||
# Check if it's a class
|
||||
elif inspect.isclass(attr):
|
||||
results["classes"].append(full_name)
|
||||
|
||||
# Check if it's a module
|
||||
elif inspect.ismodule(attr):
|
||||
results["modules"].append(full_name)
|
||||
|
||||
# Otherwise it's a regular attribute
|
||||
else:
|
||||
try:
|
||||
attr_value = str(attr)
|
||||
if len(attr_value) > 100:
|
||||
attr_value = attr_value[:100] + "..."
|
||||
results["attributes"].append((full_name, attr_value))
|
||||
except:
|
||||
results["attributes"].append((full_name, "Unable to get value"))
|
||||
except Exception as e:
|
||||
results["attributes"].append((full_name, f"Error: {str(e)}"))
|
||||
|
||||
return results
|
||||
|
||||
# Inspect ipex
|
||||
ipex_info = inspect_module(ipex)
|
||||
|
||||
# Print information in a readable format
|
||||
print("INTEL PYTORCH EXTENSION (IPEX) API\n")
|
||||
|
||||
print("=== VERSION ===")
|
||||
if "__version__" in dir(ipex):
|
||||
print(f"Version: {ipex.__version__}")
|
||||
else:
|
||||
print("Version information not available")
|
||||
print()
|
||||
|
||||
print("=== FUNCTIONS ===")
|
||||
for func, sig in sorted(ipex_info["functions"]):
|
||||
print(f"{func}{sig}")
|
||||
print()
|
||||
|
||||
print("=== ATTRIBUTES ===")
|
||||
for attr, value in sorted(ipex_info["attributes"]):
|
||||
print(f"{attr} = {value}")
|
||||
print()
|
||||
|
||||
print("=== CLASSES ===")
|
||||
for cls in sorted(ipex_info["classes"]):
|
||||
print(cls)
|
||||
print()
|
||||
|
||||
print("=== MODULES ===")
|
||||
for mod in sorted(ipex_info["modules"]):
|
||||
print(mod)
|
||||
print()
|
||||
|
||||
# Deep inspection of nn module which is likely important for quantization
|
||||
if hasattr(ipex, "nn"):
|
||||
print("=== DIVING INTO ipex.nn MODULE ===")
|
||||
nn_info = inspect_module(ipex.nn, "ipex.nn")
|
||||
|
||||
print("--- ipex.nn FUNCTIONS ---")
|
||||
for func, sig in sorted(nn_info["functions"]):
|
||||
print(f"{func}{sig}")
|
||||
print()
|
||||
|
||||
print("--- ipex.nn CLASSES ---")
|
||||
for cls in sorted(nn_info["classes"]):
|
||||
print(cls)
|
||||
print()
|
||||
|
||||
# Dive into utils if available
|
||||
if hasattr(ipex.nn, "utils"):
|
||||
print("=== DIVING INTO ipex.nn.utils MODULE ===")
|
||||
utils_info = inspect_module(ipex.nn.utils, "ipex.nn.utils")
|
||||
|
||||
print("--- ipex.nn.utils FUNCTIONS ---")
|
||||
for func, sig in sorted(utils_info["functions"]):
|
||||
print(f"{func}{sig}")
|
||||
print()
|
||||
|
||||
print("--- ipex.nn.utils CLASSES ---")
|
||||
for cls in sorted(utils_info["classes"]):
|
||||
print(cls)
|
||||
print()
|
||||
|
||||
# Look for parameter wrapper specifically
|
||||
if "_parameter_wrapper" in dir(ipex.nn.utils):
|
||||
print("=== DIVING INTO ipex.nn.utils._parameter_wrapper MODULE ===")
|
||||
pw_info = inspect_module(ipex.nn.utils._parameter_wrapper, "ipex.nn.utils._parameter_wrapper")
|
||||
|
||||
print("--- ipex.nn.utils._parameter_wrapper FUNCTIONS ---")
|
||||
for func, sig in sorted(pw_info["functions"]):
|
||||
print(f"{func}{sig}")
|
||||
print()
|
||||
|
||||
print("--- ipex.nn.utils._parameter_wrapper CLASSES ---")
|
||||
for cls in sorted(pw_info["classes"]):
|
||||
print(cls)
|
||||
print()
|
||||
|
||||
# Specifically look for functions related to quantization
|
||||
print("=== FUNCTIONS RELATED TO QUANTIZATION ===")
|
||||
quant_funcs = []
|
||||
for module_name in dir(ipex):
|
||||
if not module_name.startswith("_"):
|
||||
module = getattr(ipex, module_name)
|
||||
if inspect.ismodule(module):
|
||||
for func_name in dir(module):
|
||||
if any(term in func_name.lower() for term in ["quant", "bit", "int8", "int4", "precision"]):
|
||||
try:
|
||||
func = getattr(module, func_name)
|
||||
if inspect.isfunction(func) or inspect.isbuiltin(func):
|
||||
quant_funcs.append(f"ipex.{module_name}.{func_name}")
|
||||
except:
|
||||
pass
|
||||
|
||||
for func in sorted(quant_funcs):
|
||||
print(func)
|
||||
|
23
src/ketr-chat/.gitignore
vendored
@ -1,23 +0,0 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
@ -1,46 +0,0 @@
|
||||
# Getting Started with Create React App
|
||||
|
||||
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
|
||||
|
||||
## Available Scripts
|
||||
|
||||
In the project directory, you can run:
|
||||
|
||||
### `npm start`
|
||||
|
||||
Runs the app in the development mode.\
|
||||
Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
|
||||
|
||||
The page will reload if you make edits.\
|
||||
You will also see any lint errors in the console.
|
||||
|
||||
### `npm test`
|
||||
|
||||
Launches the test runner in the interactive watch mode.\
|
||||
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
|
||||
|
||||
### `npm run build`
|
||||
|
||||
Builds the app for production to the `build` folder.\
|
||||
It correctly bundles React in production mode and optimizes the build for the best performance.
|
||||
|
||||
The build is minified and the filenames include the hashes.\
|
||||
Your app is ready to be deployed!
|
||||
|
||||
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
|
||||
|
||||
### `npm run eject`
|
||||
|
||||
**Note: this is a one-way operation. Once you `eject`, you can’t go back!**
|
||||
|
||||
If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
|
||||
|
||||
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own.
|
||||
|
||||
You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it.
|
||||
|
||||
## Learn More
|
||||
|
||||
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
|
||||
|
||||
To learn React, check out the [React documentation](https://reactjs.org/).
|
Before Width: | Height: | Size: 318 B |
Before Width: | Height: | Size: 4.6 KiB |
21347
src/ketr-chat/package-lock.json
generated
@ -1,56 +0,0 @@
|
||||
{
|
||||
"name": "ketr-chat",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@emotion/react": "^11.14.0",
|
||||
"@emotion/styled": "^11.14.0",
|
||||
"@fontsource/roboto": "^5.2.5",
|
||||
"@mui/icons-material": "^7.0.1",
|
||||
"@mui/material": "^7.0.1",
|
||||
"@testing-library/dom": "^10.4.0",
|
||||
"@testing-library/jest-dom": "^6.6.3",
|
||||
"@testing-library/react": "^16.2.0",
|
||||
"@testing-library/user-event": "^13.5.0",
|
||||
"@types/jest": "^27.5.2",
|
||||
"@types/node": "^16.18.126",
|
||||
"@types/react": "^19.0.12",
|
||||
"@types/react-dom": "^19.0.4",
|
||||
"mui-markdown": "^1.2.6",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-plotly.js": "^2.6.0",
|
||||
"react-scripts": "5.0.1",
|
||||
"react-spinners": "^0.15.0",
|
||||
"rehype-katex": "^7.0.1",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-math": "^6.0.0",
|
||||
"typescript": "^4.9.5",
|
||||
"web-vitals": "^2.1.4"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "react-scripts start",
|
||||
"build": "react-scripts build",
|
||||
"test": "react-scripts test",
|
||||
"eject": "react-scripts eject"
|
||||
},
|
||||
"eslintConfig": {
|
||||
"extends": [
|
||||
"react-app",
|
||||
"react-app/jest"
|
||||
]
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.2%",
|
||||
"not dead",
|
||||
"not op_mini all"
|
||||
],
|
||||
"development": [
|
||||
"last 1 chrome version",
|
||||
"last 1 firefox version",
|
||||
"last 1 safari version"
|
||||
]
|
||||
}
|
||||
}
|
Before Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 4.9 KiB |
Before Width: | Height: | Size: 318 B |
Before Width: | Height: | Size: 4.6 KiB |
Before Width: | Height: | Size: 22 KiB |
@ -1,43 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta
|
||||
name="description"
|
||||
content="Interactive chat with an enhanced LLM."
|
||||
/>
|
||||
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
||||
<!--
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
||||
<!--
|
||||
Notice the use of %PUBLIC_URL% in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
Only files inside the `public` folder can be referenced from the HTML.
|
||||
|
||||
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
||||
work correctly both with client-side routing and a non-root public URL.
|
||||
Learn how to configure a non-root public URL by running `npm run build`.
|
||||
-->
|
||||
<title>Ketr-Chat</title>
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
<div id="root"></div>
|
||||
<!--
|
||||
This HTML file is a template.
|
||||
If you open it directly in the browser, you will see an empty page.
|
||||
|
||||
You can add webfonts, meta tags, or analytics to this file.
|
||||
The build step will place the bundled scripts into the <body> tag.
|
||||
|
||||
To begin the development, run `npm start` or `yarn start`.
|
||||
To create a production bundle, use `npm run build` or `yarn build`.
|
||||
-->
|
||||
</body>
|
||||
</html>
|
Before Width: | Height: | Size: 2.6 KiB |
Before Width: | Height: | Size: 7.5 KiB |
@ -1,25 +0,0 @@
|
||||
{
|
||||
"short_name": "Ketr-Chat",
|
||||
"name": "Ketrenos AI Chat",
|
||||
"icons": [
|
||||
{
|
||||
"src": "favicon.ico",
|
||||
"sizes": "64x64 32x32 24x24 16x16",
|
||||
"type": "image/x-icon"
|
||||
},
|
||||
{
|
||||
"src": "logo192.png",
|
||||
"type": "image/png",
|
||||
"sizes": "192x192"
|
||||
},
|
||||
{
|
||||
"src": "logo512.png",
|
||||
"type": "image/png",
|
||||
"sizes": "512x512"
|
||||
}
|
||||
],
|
||||
"start_url": ".",
|
||||
"display": "standalone",
|
||||
"theme_color": "#000000",
|
||||
"background_color": "#ffffff"
|
||||
}
|
Before Width: | Height: | Size: 5.0 KiB |
Before Width: | Height: | Size: 6.2 KiB |
Before Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 21 KiB |
@ -1,3 +0,0 @@
|
||||
# https://www.robotstxt.org/robotstxt.html
|
||||
User-agent: *
|
||||
Disallow:
|
Before Width: | Height: | Size: 4.7 KiB |
Before Width: | Height: | Size: 17 KiB |
@ -55,7 +55,7 @@ import '@fontsource/roboto/700.css';
|
||||
const use_mui_markdown = true
|
||||
|
||||
const welcomeMarkdown = `
|
||||
# Welcome to Ketr-Chat
|
||||
# Welcome to AIRC
|
||||
|
||||
This LLM agent was built by James Ketrenos in order to provide answers to any questions you may have about his work history. In addition to being a RAG enabled expert system, the LLM is configured with real-time access to weather, stocks, the current time, and can answer questions about the contents of a website.
|
||||
|
||||
@ -1171,7 +1171,7 @@ const App = () => {
|
||||
}}
|
||||
>
|
||||
<Toolbar>
|
||||
<Tooltip title="Chat Settings">
|
||||
<Tooltip title="LLM Settings">
|
||||
<IconButton
|
||||
color="inherit"
|
||||
aria-label="open drawer"
|
||||
@ -1195,7 +1195,7 @@ const App = () => {
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
<Typography variant="h6" noWrap component="div">
|
||||
Ketr-Chat
|
||||
ai.ketrenos.com
|
||||
</Typography>
|
||||
|
||||
{
|
||||
|
@ -1,15 +0,0 @@
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
|
||||
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
|
||||
sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
padding: 0;
|
||||
height: 100dvh;
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
|
||||
monospace;
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
import React from 'react';
|
||||
import ReactDOM from 'react-dom/client';
|
||||
import './index.css';
|
||||
import App from './App';
|
||||
import reportWebVitals from './reportWebVitals';
|
||||
|
||||
const root = ReactDOM.createRoot(
|
||||
document.getElementById('root') as HTMLElement
|
||||
);
|
||||
root.render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
</React.StrictMode>
|
||||
);
|
||||
|
||||
// If you want to start measuring performance in your app, pass a function
|
||||
// to log results (for example: reportWebVitals(console.log))
|
||||
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
|
||||
reportWebVitals();
|
@ -1 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>
|
Before Width: | Height: | Size: 2.6 KiB |