Initial commit
This commit is contained in:
commit
d0b652aa09
2
.dockerignore
Normal file
2
.dockerignore
Normal file
@ -0,0 +1,2 @@
|
||||
*
|
||||
!src
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.env
|
||||
cache/**
|
396
Dockerfile
Normal file
396
Dockerfile
Normal file
@ -0,0 +1,396 @@
|
||||
FROM ubuntu:oracular AS pytorch-build
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
# Instructions Dockerfied from:
|
||||
#
|
||||
# https://github.com/pytorch/pytorch
|
||||
#
|
||||
# and
|
||||
#
|
||||
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
|
||||
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
|
||||
#
|
||||
#
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
gpg \
|
||||
wget \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
|
||||
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
|
||||
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
ccache \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
gpg-agent \
|
||||
less \
|
||||
libbz2-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
libreadline-dev \
|
||||
libssl-dev \
|
||||
libsqlite3-dev \
|
||||
llvm \
|
||||
nano \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# python3 \
|
||||
# python3-pip \
|
||||
# python3-venv \
|
||||
# python3-dev \
|
||||
|
||||
RUN /usr/sbin/update-ccache-symlinks
|
||||
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
||||
|
||||
# Build Python in /opt/..., install it locally, then remove the build environment
|
||||
# collapsed to a single docker layer.
|
||||
WORKDIR /opt
|
||||
ENV PYTHON_VERSION=3.11.9
|
||||
|
||||
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
|
||||
&& cd Python-${PYTHON_VERSION} \
|
||||
&& ./configure --prefix=/opt/python --enable-optimizations \
|
||||
&& make -j$(nproc) \
|
||||
&& make install \
|
||||
&& cd /opt \
|
||||
&& rm -rf Python-${PYTHON_VERSION}
|
||||
|
||||
WORKDIR /opt/pytorch
|
||||
|
||||
FROM ubuntu:oracular AS ze-monitor
|
||||
# From https://github.com/jketreno/ze-monitor
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
debhelper \
|
||||
devscripts \
|
||||
cmake \
|
||||
git \
|
||||
libfmt-dev \
|
||||
libncurses-dev \
|
||||
rpm \
|
||||
rpm2cpio \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN apt-get install -y \
|
||||
software-properties-common \
|
||||
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y \
|
||||
libze-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN git clone --depth 1 --branch v0.3.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor
|
||||
WORKDIR /opt/ze-monitor/build
|
||||
RUN cmake .. \
|
||||
&& make \
|
||||
&& cpack
|
||||
|
||||
FROM pytorch-build AS pytorch
|
||||
|
||||
COPY --from=pytorch-build /opt/pytorch /opt/pytorch
|
||||
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
|
||||
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
|
||||
&& apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libze-intel-gpu1 \
|
||||
libze1 \
|
||||
intel-ocloc \
|
||||
intel-opencl-icd \
|
||||
xpu-smi \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
|
||||
|
||||
# When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries.
|
||||
ENV SYCL_CACHE_PERSISTENT=1
|
||||
|
||||
WORKDIR /opt/pytorch
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/pytorch/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
} > /opt/pytorch/shell ; \
|
||||
chmod +x /opt/pytorch/shell
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/pytorch/venv
|
||||
|
||||
SHELL [ "/opt/pytorch/shell" ]
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
|
||||
RUN pip3 freeze > /opt/pytorch/requirements.txt
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'echo "Container: pytorch"' ; \
|
||||
echo 'set -e' ; \
|
||||
echo 'echo "Setting pip environment to /opt/pytorch"' ; \
|
||||
echo 'source /opt/pytorch/venv/bin/activate'; \
|
||||
echo 'if [[ "${1}" == "" ]] || [[ "${1}" == "shell" ]]; then' ; \
|
||||
echo ' echo "Dropping to shell"' ; \
|
||||
echo ' /bin/bash -c "source /opt/pytorch/venv/bin/activate ; /bin/bash"' ; \
|
||||
echo 'else' ; \
|
||||
echo ' exec "${@}"' ; \
|
||||
echo 'fi' ; \
|
||||
} > /entrypoint.sh \
|
||||
&& chmod +x /entrypoint.sh
|
||||
|
||||
ENTRYPOINT [ "/entrypoint.sh" ]
|
||||
|
||||
FROM pytorch AS ipex-2.6.10
|
||||
|
||||
WORKDIR /opt
|
||||
RUN git clone --branch release/xpu/2.6.10 --depth 1 https://github.com/intel/intel-extension-for-pytorch.git ipex-2.6.10
|
||||
WORKDIR /opt/ipex-2.6.10
|
||||
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
| gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
|
||||
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
|
||||
| tee /etc/apt/sources.list.d/oneAPI.list \
|
||||
&& apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
intel-deep-learning-essentials-2025.0 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# Requirements for building ipex / oneAPI...
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libspdlog-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/ipex-2.6.10/venv
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/intel/oneapi/setvars.sh' ; \
|
||||
echo 'source /opt/ipex-2.6.10/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
} > /opt/ipex-2.6.10/shell ; \
|
||||
chmod +x /opt/ipex-2.6.10/shell
|
||||
|
||||
SHELL [ "/opt/ipex-2.6.10/shell" ]
|
||||
|
||||
#RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
RUN git submodule update --init --recursive --depth 1
|
||||
|
||||
# Building ipex-2.6.10 wheel requires level-zero loader (libze-dev)
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libze-dev \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
# torch needs to be installed
|
||||
RUN pip3 install torch --index-url https://download.pytorch.org/whl/test/xpu
|
||||
|
||||
RUN python setup.py bdist_wheel
|
||||
|
||||
FROM pytorch AS ipex-llm-src
|
||||
|
||||
# Build ipex-llm from source
|
||||
|
||||
RUN git clone --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm
|
||||
|
||||
WORKDIR /opt/ipex-llm
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
|
||||
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
|
||||
echo 'bash -c "${@}"' ; \
|
||||
} > /opt/ipex-llm/shell ; \
|
||||
chmod +x /opt/ipex-llm/shell
|
||||
|
||||
SHELL [ "/opt/ipex-llm/shell" ]
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
|
||||
COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/
|
||||
RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
|
||||
|
||||
WORKDIR /opt/ipex-llm/python/llm
|
||||
RUN pip install requests wheel
|
||||
RUN python setup.py clean --all bdist_wheel --linux
|
||||
|
||||
FROM airc AS jupyter
|
||||
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
|
||||
# BEGIN setup Jupyter
|
||||
RUN pip install jupyter \
|
||||
jupyterlab==4.3.0a0 \
|
||||
jupyterhub==5.0.0 \
|
||||
notebook==7.3.0a0 \
|
||||
"jupyter-server-proxy>=4.1.2"
|
||||
# END setup Jupyter
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'echo "Container: airc jupyter"' ; \
|
||||
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
|
||||
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
|
||||
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
|
||||
echo ' exit 1' ; \
|
||||
echo ' else' ; \
|
||||
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
|
||||
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
|
||||
echo ' fi' ; \
|
||||
echo 'fi' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'source /opt/intel/oneapi/setvars.sh' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
|
||||
echo 'while true; do' ; \
|
||||
echo ' echo "Launching jupyter notebook"' ; \
|
||||
echo ' jupyter notebook \' ; \
|
||||
echo ' --notebook-dir=/opt/jupyter \' ; \
|
||||
echo ' --port 8888 \' ; \
|
||||
echo ' --ip 0.0.0.0 \' ; \
|
||||
echo ' --no-browser \' ; \
|
||||
echo ' --allow-root \' ; \
|
||||
echo ' --ServerApp.token= \' ; \
|
||||
echo ' --ServerApp.password= \' ; \
|
||||
echo ' --ServerApp.allow_origin=* \' ; \
|
||||
echo ' --ServerApp.base_url="/jupyter" \' ; \
|
||||
echo ' "${@}" \' ; \
|
||||
echo ' >> "/root/.cache/jupyter.log" 2>&1' ; \
|
||||
echo ' echo "jupyter notebook died ($?). Restarting."' ; \
|
||||
echo ' sleep 5' ; \
|
||||
echo 'done' ; \
|
||||
} > /entrypoint-jupyter.sh \
|
||||
&& chmod +x /entrypoint-jupyter.sh
|
||||
|
||||
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
|
||||
|
||||
FROM pytorch AS airc
|
||||
|
||||
RUN python3 -m venv --system-site-packages /opt/airc/venv
|
||||
|
||||
# Don't install the full oneapi essentials; just the ones that we seem to need
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
| gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
|
||||
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
|
||||
| tee /etc/apt/sources.list.d/oneAPI.list \
|
||||
&& apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
intel-oneapi-mkl-sycl-2025.0 \
|
||||
intel-oneapi-dnnl-2025.0 \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
|
||||
echo 'source /opt/intel/oneapi/setvars.sh' ; \
|
||||
echo 'source /opt/airc/venv/bin/activate' ; \
|
||||
echo 'if [[ "$1" == "" ]]; then bash -c; else bash -c "${@}"; fi' ; \
|
||||
} > /opt/airc/shell ; \
|
||||
chmod +x /opt/airc/shell
|
||||
|
||||
SHELL [ "/opt/airc/shell" ]
|
||||
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
|
||||
# Install ipex built in ipex-2.6.10
|
||||
COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/*.whl /opt/wheels/
|
||||
RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
|
||||
# Install ipex-llm built in ipex-llm-src
|
||||
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
|
||||
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
|
||||
|
||||
COPY src/ /opt/airc/src/
|
||||
|
||||
# pydle does not work with newer asyncio due to coroutine
|
||||
# being deprecated. Patch to work.
|
||||
RUN pip3 install pydle transformers sentencepiece accelerate \
|
||||
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
|
||||
-p1 < /opt/airc/src/pydle.patch
|
||||
|
||||
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
|
||||
RUN pip install transformers==4.40
|
||||
|
||||
RUN pip3 install pydle transformers sentencepiece accelerate
|
||||
|
||||
# To get xe_linear and other Xe methods
|
||||
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
|
||||
|
||||
# trl.core doesn't have what is needed with the default 'pip install trl' version
|
||||
RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
|
||||
|
||||
# Needed by src/model-server.py
|
||||
RUN pip install flask
|
||||
|
||||
SHELL [ "/bin/bash", "-c" ]
|
||||
|
||||
RUN { \
|
||||
echo '#!/bin/bash' ; \
|
||||
echo 'set -e' ; \
|
||||
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
|
||||
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
|
||||
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
|
||||
echo ' exit 1' ; \
|
||||
echo ' else' ; \
|
||||
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
|
||||
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
|
||||
echo ' fi' ; \
|
||||
echo 'fi' ; \
|
||||
echo 'echo "Container: airc"' ; \
|
||||
echo 'echo "Setting pip environment to /opt/airc"' ; \
|
||||
echo 'source /opt/intel/oneapi/setvars.sh'; \
|
||||
echo 'source /opt/airc/venv/bin/activate'; \
|
||||
echo 'if [[ "${1}" == "shell" ]] || [[ "${1}" == "/bin/bash" ]]; then' ; \
|
||||
echo ' echo "Dropping to shell"' ; \
|
||||
echo ' /bin/bash -c "source /opt/airc/venv/bin/activate ; /bin/bash"' ; \
|
||||
echo ' exit $?' ; \
|
||||
echo 'else' ; \
|
||||
echo ' while true; do' ; \
|
||||
echo ' echo "Launching model-server"' ; \
|
||||
echo ' python src/model-server.py \' ; \
|
||||
echo ' 2>&1 | tee -a "/root/.cache/model-server.log"'; \
|
||||
echo ' echo "model-server died ($?). Restarting."' ; \
|
||||
echo ' sleep 5' ; \
|
||||
echo ' done &' ; \
|
||||
echo ' while true; do' ; \
|
||||
echo ' echo "Launching airc"' ; \
|
||||
echo ' python src/airc.py "${@}" \' ; \
|
||||
echo ' 2>&1 | tee -a "/root/.cache/airc.log"' ; \
|
||||
echo ' echo "airc died ($?). Restarting."' ; \
|
||||
echo ' sleep 5' ; \
|
||||
echo ' done' ; \
|
||||
echo 'fi' ; \
|
||||
} > /entrypoint-airc.sh \
|
||||
&& chmod +x /entrypoint-airc.sh
|
||||
|
||||
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
|
||||
RUN dpkg -i /opt/ze-monitor-*deb
|
||||
|
||||
WORKDIR /opt/airc
|
||||
|
||||
ENTRYPOINT [ "/entrypoint-airc.sh" ]
|
24
LICENSE
Normal file
24
LICENSE
Normal file
@ -0,0 +1,24 @@
|
||||
BSD 2-Clause License
|
||||
|
||||
Copyright (c) 2025, James Ketrenos
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
86
README.md
Normal file
86
README.md
Normal file
@ -0,0 +1,86 @@
|
||||
# AIRC (pronounced Eric)
|
||||
|
||||
AI is Really Cool
|
||||
|
||||
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized.
|
||||
|
||||
This project provides container definitions that will provide PyTorch 2.6 with
|
||||
Intel's LLM project. In addition, it provides a small local chat server and an IRC client to provide a chat bot.
|
||||
|
||||
# Installation
|
||||
|
||||
This project uses docker containers to build. As this was originally
|
||||
written to work on an Intel Arc B580 (Battlemage), it requires a
|
||||
kernel that supports that hardware, such as the one documented
|
||||
at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10)..
|
||||
|
||||
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
|
||||
|
||||
## Want to run under WSL2? No can do...
|
||||
|
||||
https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
|
||||
|
||||
The A- and B-series discrete GPUs do not support SR-IOV, required for
|
||||
the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
|
||||
|
||||
## Building
|
||||
|
||||
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
|
||||
|
||||
|
||||
```bash
|
||||
git clone https://github.com/jketreno/airc
|
||||
cd airc
|
||||
docker compose build
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
In order to download the models, you need to have a Hugging Face
|
||||
token. See https://huggingface.co/settings/tokens for information
|
||||
on obtaining a token.
|
||||
|
||||
Edit .env to add the following:
|
||||
|
||||
```.env
|
||||
HF_ACCESS_TOKEN=<access token from huggingface>
|
||||
```
|
||||
|
||||
NOTE: Models downloaded by most examples will be placed in the
|
||||
./cache directory, which is bind mounted to the container.
|
||||
|
||||
### AIRC
|
||||
|
||||
To launch the airc shell interactively, with the pytorch 2.6
|
||||
environment loaded, use the default entrypoint to launch a shell:
|
||||
|
||||
```bash
|
||||
docker compose run --rm airc shell
|
||||
```
|
||||
|
||||
Once in the shell, you can then launch the model-server.py and then
|
||||
the airc.py client:
|
||||
|
||||
```bash
|
||||
docker compose run --rm airc shell
|
||||
src/airc.py --ai-server=http://localhost:5000 &
|
||||
src/model-server.py
|
||||
```
|
||||
|
||||
By default, src/airc.py will connect to irc.libera.chat on the airc-test
|
||||
channel. See `python src/airc.py --help` for options.
|
||||
|
||||
By separating the model-server into its own process, you can develop
|
||||
and tweak the chat backend without losing the IRC connection established
|
||||
by airc.
|
||||
|
||||
### Jupyter
|
||||
|
||||
```bash
|
||||
docker compose up jupyter -d
|
||||
```
|
||||
|
||||
The default port for inbound connections is 8888 (see docker-compose.yml).
|
||||
$(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
|
||||
|
||||
To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
|
0
cache/.keep
vendored
Normal file
0
cache/.keep
vendored
Normal file
31
docker-compose.yml
Normal file
31
docker-compose.yml
Normal file
@ -0,0 +1,31 @@
|
||||
services:
|
||||
airc:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: airc
|
||||
image: airc
|
||||
restart: "no"
|
||||
env_file:
|
||||
- .env
|
||||
devices:
|
||||
- /dev/dri:/dev/dri
|
||||
volumes:
|
||||
- ./cache:/root/.cache
|
||||
- ./src:/opt/airc/src:rw
|
||||
|
||||
jupyter:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: jupyter
|
||||
image: jupyter
|
||||
env_file:
|
||||
- .env
|
||||
devices:
|
||||
- /dev/dri:/dev/dri
|
||||
ports:
|
||||
- 8888:8888 # Jupyter Notebook
|
||||
volumes:
|
||||
- ./jupyter:/opt/jupyter:rw
|
||||
- ./cache:/root/.cache
|
187
src/airc.py
Normal file
187
src/airc.py
Normal file
@ -0,0 +1,187 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import argparse
|
||||
import pydle
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import datetime
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AI is Really Cool")
|
||||
parser.add_argument("--server", type=str, default="irc.libera.chat", help="IRC server address")
|
||||
parser.add_argument("--port", type=int, default=6667, help="IRC server port")
|
||||
parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
|
||||
parser.add_argument("--channel", type=str, default="#airc-test", help="Channel to join")
|
||||
parser.add_argument("--ai-server", type=str, default="http://localhost:5000", help="OpenAI API endpoint")
|
||||
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
default='INFO', help='Set the logging level.')
|
||||
return parser.parse_args()
|
||||
|
||||
class AsyncOpenAIClient:
|
||||
def __init__(self, base_url: str = "http://localhost:5000"):
|
||||
logging.info(f"Using {base_url} as server")
|
||||
self.base_url = base_url
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self.session = aiohttp.ClientSession()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def chat_completion(self,
|
||||
messages: list,
|
||||
model: str = "my-model",
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 100) -> Dict[str, Any]:
|
||||
"""
|
||||
Make an async chat completion request
|
||||
"""
|
||||
url = f"{self.base_url}/v1/chat/completions"
|
||||
|
||||
# Prepare the request payload
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(url, json=payload) as response:
|
||||
if response.status != 200:
|
||||
error_text = await response.text()
|
||||
raise Exception(f"Request failed with status {response.status}: {error_text}")
|
||||
|
||||
return await response.json()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during request: {str(e)}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def setup_logging(level):
|
||||
numeric_level = getattr(logging, level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError(f"Invalid log level: {level}")
|
||||
|
||||
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logging.info(f"Logging is set to {level} level.")
|
||||
|
||||
class AIRC(pydle.Client):
|
||||
def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0):
|
||||
super().__init__(nick)
|
||||
self.nick = nick
|
||||
self.channel = channel
|
||||
self.burst_limit = burst_limit
|
||||
self.sent_burst = 0
|
||||
self.rate_limit = rate_limit
|
||||
self.burst_reset_timeout = burst_reset_timeout
|
||||
self.sent_burst = 0 # Track messages sent in burst
|
||||
self.last_message_time = None # Track last message time
|
||||
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
|
||||
self._message_queue = asyncio.Queue()
|
||||
self._task = asyncio.create_task(self._send_from_queue())
|
||||
self.client = client
|
||||
|
||||
async def _send_from_queue(self):
|
||||
"""Background task that sends queued messages with burst + rate limiting."""
|
||||
while True:
|
||||
target, message = await self._message_queue.get()
|
||||
|
||||
# If burst is still available, send immediately
|
||||
if self.sent_burst < self.burst_limit:
|
||||
self.sent_burst += 1
|
||||
else:
|
||||
await asyncio.sleep(self.rate_limit) # Apply rate limit
|
||||
|
||||
await super().message(target, message) # Send message
|
||||
self.last_message_time = asyncio.get_event_loop().time() # Update last message timestamp
|
||||
|
||||
# Start burst reset countdown after each message
|
||||
asyncio.create_task(self._reset_burst_after_inactivity())
|
||||
|
||||
async def _reset_burst_after_inactivity(self):
|
||||
"""Resets burst counter only if no new messages are sent within timeout."""
|
||||
last_time = self.last_message_time
|
||||
await asyncio.sleep(self.burst_reset_timeout) # Wait for inactivity period
|
||||
|
||||
# Only reset if no new messages were sent during the wait
|
||||
if self.last_message_time == last_time:
|
||||
self.sent_burst = 0
|
||||
logging.info("Burst limit reset due to inactivity.")
|
||||
|
||||
async def message(self, target, message):
|
||||
"""Splits a multi-line message and sends each line separately."""
|
||||
for line in message.splitlines(): # Splits on both '\n' and '\r\n'
|
||||
if line.strip(): # Ignore empty lines
|
||||
await self._message_queue.put((target, line))
|
||||
|
||||
async def on_connect(self):
|
||||
logging.debug('on_connect')
|
||||
await self.join(self.channel)
|
||||
|
||||
def remove_substring(self, string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
def extract_nick_message(self, input_string):
|
||||
# Pattern with capturing groups for nick and message
|
||||
pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
|
||||
|
||||
match = re.match(pattern, input_string)
|
||||
if match:
|
||||
nick = match.group(1) # First capturing group
|
||||
message = match.group(2) # Second capturing group
|
||||
return nick, message
|
||||
return None, None # Return None for both if no match
|
||||
|
||||
async def on_message(self, target, source, message):
|
||||
if source == self.nick:
|
||||
return
|
||||
nick, body = self.extract_nick_message(message)
|
||||
if nick == self.nick:
|
||||
content = None
|
||||
if body == "stats":
|
||||
content = f"{self.queries} queries handled in {self.processing}s"
|
||||
else:
|
||||
# Sample messages
|
||||
messages = [
|
||||
{"role": "system", "content": self.system_input},
|
||||
{"role": "user", "content": body}
|
||||
]
|
||||
|
||||
# Make the request
|
||||
response = await self.client.chat_completion(messages)
|
||||
|
||||
# Extract and print just the assistant's message if available
|
||||
if "choices" in response and len(response["choices"]) > 0:
|
||||
content = response["choices"][0]["message"]["content"]
|
||||
print(f"\nAssistant: {content}")
|
||||
|
||||
if content:
|
||||
logging.info(f'Sending: {content}')
|
||||
await self.message(target, f"{content}")
|
||||
|
||||
def remove_substring(string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
async def main():
|
||||
# Parse command-line arguments
|
||||
args = parse_args()
|
||||
|
||||
# Setup logging based on the provided level
|
||||
setup_logging(args.level)
|
||||
|
||||
async with AsyncOpenAIClient(base_url=args.ai_server) as client:
|
||||
bot = AIRC(args.nickname, args.channel, client)
|
||||
await bot.connect(args.server, args.port, tls=False)
|
||||
await bot.handle_forever()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
171
src/model-server.py
Normal file
171
src/model-server.py
Normal file
@ -0,0 +1,171 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import json
|
||||
import asyncio
|
||||
import argparse
|
||||
import pydle
|
||||
import torch
|
||||
import logging
|
||||
from ipex_llm.transformers import AutoModelForCausalLM
|
||||
import transformers
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import datetime
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AI is Really Cool Server")
|
||||
parser.add_argument("--device", type=int, default=0, help="Device # to use for inference. See --device-list")
|
||||
#parser.add_argument("--device-list", help="List available devices")
|
||||
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
default='INFO', help='Set the logging level.')
|
||||
return parser.parse_args()
|
||||
|
||||
def setup_logging(level):
|
||||
numeric_level = getattr(logging, level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError(f"Invalid log level: {level}")
|
||||
|
||||
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logging.info(f"Logging is set to {level} level.")
|
||||
|
||||
class Chat():
|
||||
def __init__(self, device_name):
|
||||
super().__init__()
|
||||
self.device_name = device_name
|
||||
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
|
||||
self.context = None
|
||||
self.model_path = 'Intel/neural-chat-7b-v3-3'
|
||||
try:
|
||||
logging.info(f"Loading tokenizer from: {self.model_path}")
|
||||
self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
|
||||
if self.tokenizer.pad_token is None:
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad_token to eos_token if needed
|
||||
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_path,
|
||||
load_in_4bit=True,
|
||||
optimize_model=True,
|
||||
trust_remote_code=True,
|
||||
use_cache=True)
|
||||
self.model = self.model.half().to(device_name)
|
||||
except Exception as e:
|
||||
logging.error(f"Loading error: {e}")
|
||||
|
||||
def remove_substring(self, string, substring):
|
||||
return string.replace(substring, "")
|
||||
|
||||
def generate_response(self, text):
|
||||
prompt = f"### System:\n{self.system_input}\n### User:\n{text}\n### Assistant:\n"
|
||||
start = time.time()
|
||||
|
||||
with torch.autocast(self.device_name, dtype=torch.float16):
|
||||
inputs = self.tokenizer.encode_plus(
|
||||
prompt,
|
||||
add_special_tokens=False,
|
||||
return_tensors="pt",
|
||||
max_length=1000, # Prevent 'Asking to truncate to max_length...'
|
||||
padding=True, # Handles padding automatically
|
||||
truncation=True
|
||||
)
|
||||
input_ids = inputs["input_ids"].to(self.device_name)
|
||||
attention_mask = inputs["attention_mask"].to(self.device_name)
|
||||
|
||||
outputs = self.model.generate(
|
||||
input_ids=input_ids,
|
||||
attention_mask=attention_mask,
|
||||
max_length=1000,
|
||||
num_return_sequences=1,
|
||||
pad_token_id=self.tokenizer.eos_token_id
|
||||
)
|
||||
|
||||
final_outputs = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
final_outputs = self.remove_substring(final_outputs, prompt).strip()
|
||||
|
||||
end = time.time()
|
||||
|
||||
return final_outputs, datetime.timedelta(seconds=end - start)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Basic endpoint for chat completions
|
||||
@app.route('/v1/chat/completions', methods=['POST'])
|
||||
def chat_completions():
|
||||
logging.info('/v1/chat/completions')
|
||||
try:
|
||||
# Get the JSON data from the request
|
||||
data = request.get_json()
|
||||
|
||||
# Extract relevant fields from the request
|
||||
model = data.get('model', 'default-model')
|
||||
messages = data.get('messages', [])
|
||||
temperature = data.get('temperature', 1.0)
|
||||
max_tokens = data.get('max_tokens', 2048)
|
||||
|
||||
chat = app.config['chat']
|
||||
logging.info(f"Query: {messages}")
|
||||
response_content, _ = chat.generate_response(messages[-1]['content'])
|
||||
logging.info(f"Response: {response_content}")
|
||||
# Format response in OpenAI-compatible structure
|
||||
response = {
|
||||
"id": "chatcmpl-" + str(id(data)), # Simple unique ID
|
||||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": chat.model_path,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": response_content
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
# "usage": {
|
||||
# "prompt_tokens": len(str(messages).split()),
|
||||
# "completion_tokens": len(response_content.split()),
|
||||
# "total_tokens": len(str(messages).split()) + len(response_content.split())
|
||||
# }
|
||||
}
|
||||
|
||||
return jsonify(response)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
return jsonify({
|
||||
"error": {
|
||||
"message": str(e),
|
||||
"type": "invalid_request_error"
|
||||
}
|
||||
}), 400
|
||||
|
||||
# Health check endpoint
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health():
|
||||
return jsonify({"status": "healthy"}), 200
|
||||
|
||||
if __name__ == '__main__':
|
||||
import time # Imported here for the timestamp
|
||||
# Parse command-line arguments
|
||||
args = parse_args()
|
||||
|
||||
# Setup logging based on the provided level
|
||||
setup_logging(args.level)
|
||||
|
||||
if not torch.xpu.is_available():
|
||||
logging.error("No XPU available.")
|
||||
exit(1)
|
||||
device_count = torch.xpu.device_count();
|
||||
for i in range(device_count):
|
||||
logging.info(f"Device {i}: {torch.xpu.get_device_name(i)} Total memory: {torch.xpu.get_device_properties(i).total_memory}")
|
||||
device_name = 'xpu'
|
||||
device = torch.device(device_name)
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# Set environment variables that might help with XPU stability
|
||||
os.environ["SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"] = "1"
|
||||
|
||||
app.config['chat'] = Chat(device_name)
|
||||
|
||||
app.run(host='0.0.0.0', port=5000, debug=True)
|
56
src/pydle.patch
Normal file
56
src/pydle.patch
Normal file
@ -0,0 +1,56 @@
|
||||
diff --git a/__init__.py b/__init__.py
|
||||
index 2ead20d..892471b 100644
|
||||
--- a/__init__.py
|
||||
+++ b/__init__.py
|
||||
@@ -1,11 +1,21 @@
|
||||
# noinspection PyUnresolvedReferences
|
||||
-from asyncio import coroutine, Future
|
||||
+from asyncio import Future
|
||||
from functools import cmp_to_key
|
||||
from . import connection, protocol, client, features
|
||||
from .client import Error, NotInChannel, AlreadyInChannel, BasicClient, ClientPool
|
||||
from .features.ircv3.cap import NEGOTIATING as CAPABILITY_NEGOTIATING, FAILED as CAPABILITY_FAILED, \
|
||||
NEGOTIATED as CAPABILITY_NEGOTIATED
|
||||
|
||||
+import asyncio
|
||||
+# And use asyncio.coroutine where it was used, although it's better to switch to async def
|
||||
+# However, since 'coroutine' decorator is removed, you would actually need to:
|
||||
+from functools import wraps
|
||||
+
|
||||
+def coroutine(func):
|
||||
+ @wraps(func)
|
||||
+ async def wrapper(*args, **kwargs):
|
||||
+ return func(*args, **kwargs)
|
||||
+ return wrapper
|
||||
|
||||
__name__ = 'pydle'
|
||||
__version__ = '0.9.4rc1'
|
||||
diff --git a/connection.py b/connection.py
|
||||
index c9a9e8e..5445b0e 100644
|
||||
--- a/connection.py
|
||||
+++ b/connection.py
|
||||
@@ -37,6 +37,7 @@ class Connection:
|
||||
self.reader = None
|
||||
self.writer = None
|
||||
self.eventloop = eventloop or asyncio.new_event_loop()
|
||||
+ self.lock = asyncio.Lock()
|
||||
|
||||
async def connect(self):
|
||||
""" Connect to target. """
|
||||
@@ -49,8 +50,7 @@ class Connection:
|
||||
host=self.hostname,
|
||||
port=self.port,
|
||||
local_addr=self.source_address,
|
||||
- ssl=self.tls_context,
|
||||
- loop=self.eventloop
|
||||
+ ssl=self.tls_context
|
||||
)
|
||||
|
||||
def create_tls_context(self):
|
||||
@@ -112,4 +112,5 @@ class Connection:
|
||||
await self.writer.drain()
|
||||
|
||||
async def recv(self, *, timeout=None):
|
||||
- return await asyncio.wait_for(self.reader.readline(), timeout=timeout)
|
||||
+ async with self.lock:
|
||||
+ return await asyncio.wait_for(self.reader.readline(), timeout=timeout)
|
Loading…
x
Reference in New Issue
Block a user