Initial commit

This commit is contained in:
James Ketr 2025-03-06 13:48:18 -08:00
commit d0b652aa09
11 changed files with 955 additions and 0 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
*
!src

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.env
cache/**

396
Dockerfile Normal file
View File

@ -0,0 +1,396 @@
FROM ubuntu:oracular AS pytorch-build
SHELL [ "/bin/bash", "-c" ]
# Instructions Dockerfied from:
#
# https://github.com/pytorch/pytorch
#
# and
#
# https://pytorch.org/docs/stable/notes/get_start_xpu.html
# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
#
#
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
gpg \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
ca-certificates \
ccache \
cmake \
curl \
git \
gpg-agent \
less \
libbz2-dev \
libffi-dev \
libjpeg-dev \
libpng-dev \
libreadline-dev \
libssl-dev \
libsqlite3-dev \
llvm \
nano \
wget \
zlib1g-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# python3 \
# python3-pip \
# python3-venv \
# python3-dev \
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
# Build Python in /opt/..., install it locally, then remove the build environment
# collapsed to a single docker layer.
WORKDIR /opt
ENV PYTHON_VERSION=3.11.9
RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
&& cd Python-${PYTHON_VERSION} \
&& ./configure --prefix=/opt/python --enable-optimizations \
&& make -j$(nproc) \
&& make install \
&& cd /opt \
&& rm -rf Python-${PYTHON_VERSION}
WORKDIR /opt/pytorch
FROM ubuntu:oracular AS ze-monitor
# From https://github.com/jketreno/ze-monitor
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
debhelper \
devscripts \
cmake \
git \
libfmt-dev \
libncurses-dev \
rpm \
rpm2cpio \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN apt-get install -y \
software-properties-common \
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& apt-get update \
&& apt-get install -y \
libze-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN git clone --depth 1 --branch v0.3.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor
WORKDIR /opt/ze-monitor/build
RUN cmake .. \
&& make \
&& cpack
FROM pytorch-build AS pytorch
COPY --from=pytorch-build /opt/pytorch /opt/pytorch
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
&& add-apt-repository -y ppa:kobuk-team/intel-graphics \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libze-intel-gpu1 \
libze1 \
intel-ocloc \
intel-opencl-icd \
xpu-smi \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
# When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries.
ENV SYCL_CACHE_PERSISTENT=1
WORKDIR /opt/pytorch
RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/pytorch/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/pytorch/shell ; \
chmod +x /opt/pytorch/shell
RUN python3 -m venv --system-site-packages /opt/pytorch/venv
SHELL [ "/opt/pytorch/shell" ]
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
RUN pip3 freeze > /opt/pytorch/requirements.txt
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'echo "Container: pytorch"' ; \
echo 'set -e' ; \
echo 'echo "Setting pip environment to /opt/pytorch"' ; \
echo 'source /opt/pytorch/venv/bin/activate'; \
echo 'if [[ "${1}" == "" ]] || [[ "${1}" == "shell" ]]; then' ; \
echo ' echo "Dropping to shell"' ; \
echo ' /bin/bash -c "source /opt/pytorch/venv/bin/activate ; /bin/bash"' ; \
echo 'else' ; \
echo ' exec "${@}"' ; \
echo 'fi' ; \
} > /entrypoint.sh \
&& chmod +x /entrypoint.sh
ENTRYPOINT [ "/entrypoint.sh" ]
FROM pytorch AS ipex-2.6.10
WORKDIR /opt
RUN git clone --branch release/xpu/2.6.10 --depth 1 https://github.com/intel/intel-extension-for-pytorch.git ipex-2.6.10
WORKDIR /opt/ipex-2.6.10
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
| tee /etc/apt/sources.list.d/oneAPI.list \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
intel-deep-learning-essentials-2025.0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# Requirements for building ipex / oneAPI...
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libspdlog-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN python3 -m venv --system-site-packages /opt/ipex-2.6.10/venv
RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/intel/oneapi/setvars.sh' ; \
echo 'source /opt/ipex-2.6.10/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/ipex-2.6.10/shell ; \
chmod +x /opt/ipex-2.6.10/shell
SHELL [ "/opt/ipex-2.6.10/shell" ]
#RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
RUN pip3 install -r requirements.txt
RUN git submodule update --init --recursive --depth 1
# Building ipex-2.6.10 wheel requires level-zero loader (libze-dev)
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
libze-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
# torch needs to be installed
RUN pip3 install torch --index-url https://download.pytorch.org/whl/test/xpu
RUN python setup.py bdist_wheel
FROM pytorch AS ipex-llm-src
# Build ipex-llm from source
RUN git clone --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm
WORKDIR /opt/ipex-llm
RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
RUN { \
echo '#!/bin/bash' ; \
update-alternatives --set python3 /opt/python/bin/python3.11 ; \
echo 'source /opt/ipex-llm/venv/bin/activate' ; \
echo 'bash -c "${@}"' ; \
} > /opt/ipex-llm/shell ; \
chmod +x /opt/ipex-llm/shell
SHELL [ "/opt/ipex-llm/shell" ]
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/
RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
WORKDIR /opt/ipex-llm/python/llm
RUN pip install requests wheel
RUN python setup.py clean --all bdist_wheel --linux
FROM airc AS jupyter
SHELL [ "/opt/airc/shell" ]
# BEGIN setup Jupyter
RUN pip install jupyter \
jupyterlab==4.3.0a0 \
jupyterhub==5.0.0 \
notebook==7.3.0a0 \
"jupyter-server-proxy>=4.1.2"
# END setup Jupyter
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'echo "Container: airc jupyter"' ; \
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
echo ' exit 1' ; \
echo ' else' ; \
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
echo ' fi' ; \
echo 'fi' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/intel/oneapi/setvars.sh' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
echo 'while true; do' ; \
echo ' echo "Launching jupyter notebook"' ; \
echo ' jupyter notebook \' ; \
echo ' --notebook-dir=/opt/jupyter \' ; \
echo ' --port 8888 \' ; \
echo ' --ip 0.0.0.0 \' ; \
echo ' --no-browser \' ; \
echo ' --allow-root \' ; \
echo ' --ServerApp.token= \' ; \
echo ' --ServerApp.password= \' ; \
echo ' --ServerApp.allow_origin=* \' ; \
echo ' --ServerApp.base_url="/jupyter" \' ; \
echo ' "${@}" \' ; \
echo ' >> "/root/.cache/jupyter.log" 2>&1' ; \
echo ' echo "jupyter notebook died ($?). Restarting."' ; \
echo ' sleep 5' ; \
echo 'done' ; \
} > /entrypoint-jupyter.sh \
&& chmod +x /entrypoint-jupyter.sh
ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
FROM pytorch AS airc
RUN python3 -m venv --system-site-packages /opt/airc/venv
# Don't install the full oneapi essentials; just the ones that we seem to need
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
| tee /etc/apt/sources.list.d/oneAPI.list \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
intel-oneapi-mkl-sycl-2025.0 \
intel-oneapi-dnnl-2025.0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
RUN { \
echo '#!/bin/bash' ; \
echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
echo 'source /opt/intel/oneapi/setvars.sh' ; \
echo 'source /opt/airc/venv/bin/activate' ; \
echo 'if [[ "$1" == "" ]]; then bash -c; else bash -c "${@}"; fi' ; \
} > /opt/airc/shell ; \
chmod +x /opt/airc/shell
SHELL [ "/opt/airc/shell" ]
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
# Install ipex built in ipex-2.6.10
COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/*.whl /opt/wheels/
RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
# Install ipex-llm built in ipex-llm-src
COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
COPY src/ /opt/airc/src/
# pydle does not work with newer asyncio due to coroutine
# being deprecated. Patch to work.
RUN pip3 install pydle transformers sentencepiece accelerate \
&& patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
-p1 < /opt/airc/src/pydle.patch
# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
RUN pip install transformers==4.40
RUN pip3 install pydle transformers sentencepiece accelerate
# To get xe_linear and other Xe methods
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
# trl.core doesn't have what is needed with the default 'pip install trl' version
RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
# Needed by src/model-server.py
RUN pip install flask
SHELL [ "/bin/bash", "-c" ]
RUN { \
echo '#!/bin/bash' ; \
echo 'set -e' ; \
echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
echo ' exit 1' ; \
echo ' else' ; \
echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
echo ' fi' ; \
echo 'fi' ; \
echo 'echo "Container: airc"' ; \
echo 'echo "Setting pip environment to /opt/airc"' ; \
echo 'source /opt/intel/oneapi/setvars.sh'; \
echo 'source /opt/airc/venv/bin/activate'; \
echo 'if [[ "${1}" == "shell" ]] || [[ "${1}" == "/bin/bash" ]]; then' ; \
echo ' echo "Dropping to shell"' ; \
echo ' /bin/bash -c "source /opt/airc/venv/bin/activate ; /bin/bash"' ; \
echo ' exit $?' ; \
echo 'else' ; \
echo ' while true; do' ; \
echo ' echo "Launching model-server"' ; \
echo ' python src/model-server.py \' ; \
echo ' 2>&1 | tee -a "/root/.cache/model-server.log"'; \
echo ' echo "model-server died ($?). Restarting."' ; \
echo ' sleep 5' ; \
echo ' done &' ; \
echo ' while true; do' ; \
echo ' echo "Launching airc"' ; \
echo ' python src/airc.py "${@}" \' ; \
echo ' 2>&1 | tee -a "/root/.cache/airc.log"' ; \
echo ' echo "airc died ($?). Restarting."' ; \
echo ' sleep 5' ; \
echo ' done' ; \
echo 'fi' ; \
} > /entrypoint-airc.sh \
&& chmod +x /entrypoint-airc.sh
COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb
WORKDIR /opt/airc
ENTRYPOINT [ "/entrypoint-airc.sh" ]

24
LICENSE Normal file
View File

@ -0,0 +1,24 @@
BSD 2-Clause License
Copyright (c) 2025, James Ketrenos
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

86
README.md Normal file
View File

@ -0,0 +1,86 @@
# AIRC (pronounced Eric)
AI is Really Cool
NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized.
This project provides container definitions that will provide PyTorch 2.6 with
Intel's LLM project. In addition, it provides a small local chat server and an IRC client to provide a chat bot.
# Installation
This project uses docker containers to build. As this was originally
written to work on an Intel Arc B580 (Battlemage), it requires a
kernel that supports that hardware, such as the one documented
at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10)..
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
## Want to run under WSL2? No can do...
https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
The A- and B-series discrete GPUs do not support SR-IOV, required for
the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
## Building
NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
```bash
git clone https://github.com/jketreno/airc
cd airc
docker compose build
```
## Running
In order to download the models, you need to have a Hugging Face
token. See https://huggingface.co/settings/tokens for information
on obtaining a token.
Edit .env to add the following:
```.env
HF_ACCESS_TOKEN=<access token from huggingface>
```
NOTE: Models downloaded by most examples will be placed in the
./cache directory, which is bind mounted to the container.
### AIRC
To launch the airc shell interactively, with the pytorch 2.6
environment loaded, use the default entrypoint to launch a shell:
```bash
docker compose run --rm airc shell
```
Once in the shell, you can then launch the model-server.py and then
the airc.py client:
```bash
docker compose run --rm airc shell
src/airc.py --ai-server=http://localhost:5000 &
src/model-server.py
```
By default, src/airc.py will connect to irc.libera.chat on the airc-test
channel. See `python src/airc.py --help` for options.
By separating the model-server into its own process, you can develop
and tweak the chat backend without losing the IRC connection established
by airc.
### Jupyter
```bash
docker compose up jupyter -d
```
The default port for inbound connections is 8888 (see docker-compose.yml).
$(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
To access the jupyter notebook, go to `https://localhost:8888/jupyter`.

0
cache/.keep vendored Normal file
View File

31
docker-compose.yml Normal file
View File

@ -0,0 +1,31 @@
services:
airc:
build:
context: .
dockerfile: Dockerfile
target: airc
image: airc
restart: "no"
env_file:
- .env
devices:
- /dev/dri:/dev/dri
volumes:
- ./cache:/root/.cache
- ./src:/opt/airc/src:rw
jupyter:
build:
context: .
dockerfile: Dockerfile
target: jupyter
image: jupyter
env_file:
- .env
devices:
- /dev/dri:/dev/dri
ports:
- 8888:8888 # Jupyter Notebook
volumes:
- ./jupyter:/opt/jupyter:rw
- ./cache:/root/.cache

0
src/.keep Normal file
View File

187
src/airc.py Normal file
View File

@ -0,0 +1,187 @@
import asyncio
import aiohttp
import argparse
import pydle
import logging
import os
import re
import time
import datetime
import asyncio
import json
from typing import Dict, Any
def parse_args():
parser = argparse.ArgumentParser(description="AI is Really Cool")
parser.add_argument("--server", type=str, default="irc.libera.chat", help="IRC server address")
parser.add_argument("--port", type=int, default=6667, help="IRC server port")
parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
parser.add_argument("--channel", type=str, default="#airc-test", help="Channel to join")
parser.add_argument("--ai-server", type=str, default="http://localhost:5000", help="OpenAI API endpoint")
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default='INFO', help='Set the logging level.')
return parser.parse_args()
class AsyncOpenAIClient:
def __init__(self, base_url: str = "http://localhost:5000"):
logging.info(f"Using {base_url} as server")
self.base_url = base_url
self.session = None
async def __aenter__(self):
self.session = aiohttp.ClientSession()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def chat_completion(self,
messages: list,
model: str = "my-model",
temperature: float = 0.7,
max_tokens: int = 100) -> Dict[str, Any]:
"""
Make an async chat completion request
"""
url = f"{self.base_url}/v1/chat/completions"
# Prepare the request payload
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens
}
try:
async with self.session.post(url, json=payload) as response:
if response.status != 200:
error_text = await response.text()
raise Exception(f"Request failed with status {response.status}: {error_text}")
return await response.json()
except Exception as e:
print(f"Error during request: {str(e)}")
return {"error": str(e)}
def setup_logging(level):
numeric_level = getattr(logging, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {level}")
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info(f"Logging is set to {level} level.")
class AIRC(pydle.Client):
def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0):
super().__init__(nick)
self.nick = nick
self.channel = channel
self.burst_limit = burst_limit
self.sent_burst = 0
self.rate_limit = rate_limit
self.burst_reset_timeout = burst_reset_timeout
self.sent_burst = 0 # Track messages sent in burst
self.last_message_time = None # Track last message time
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
self._message_queue = asyncio.Queue()
self._task = asyncio.create_task(self._send_from_queue())
self.client = client
async def _send_from_queue(self):
"""Background task that sends queued messages with burst + rate limiting."""
while True:
target, message = await self._message_queue.get()
# If burst is still available, send immediately
if self.sent_burst < self.burst_limit:
self.sent_burst += 1
else:
await asyncio.sleep(self.rate_limit) # Apply rate limit
await super().message(target, message) # Send message
self.last_message_time = asyncio.get_event_loop().time() # Update last message timestamp
# Start burst reset countdown after each message
asyncio.create_task(self._reset_burst_after_inactivity())
async def _reset_burst_after_inactivity(self):
"""Resets burst counter only if no new messages are sent within timeout."""
last_time = self.last_message_time
await asyncio.sleep(self.burst_reset_timeout) # Wait for inactivity period
# Only reset if no new messages were sent during the wait
if self.last_message_time == last_time:
self.sent_burst = 0
logging.info("Burst limit reset due to inactivity.")
async def message(self, target, message):
"""Splits a multi-line message and sends each line separately."""
for line in message.splitlines(): # Splits on both '\n' and '\r\n'
if line.strip(): # Ignore empty lines
await self._message_queue.put((target, line))
async def on_connect(self):
logging.debug('on_connect')
await self.join(self.channel)
def remove_substring(self, string, substring):
return string.replace(substring, "")
def extract_nick_message(self, input_string):
# Pattern with capturing groups for nick and message
pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
match = re.match(pattern, input_string)
if match:
nick = match.group(1) # First capturing group
message = match.group(2) # Second capturing group
return nick, message
return None, None # Return None for both if no match
async def on_message(self, target, source, message):
if source == self.nick:
return
nick, body = self.extract_nick_message(message)
if nick == self.nick:
content = None
if body == "stats":
content = f"{self.queries} queries handled in {self.processing}s"
else:
# Sample messages
messages = [
{"role": "system", "content": self.system_input},
{"role": "user", "content": body}
]
# Make the request
response = await self.client.chat_completion(messages)
# Extract and print just the assistant's message if available
if "choices" in response and len(response["choices"]) > 0:
content = response["choices"][0]["message"]["content"]
print(f"\nAssistant: {content}")
if content:
logging.info(f'Sending: {content}')
await self.message(target, f"{content}")
def remove_substring(string, substring):
return string.replace(substring, "")
async def main():
# Parse command-line arguments
args = parse_args()
# Setup logging based on the provided level
setup_logging(args.level)
async with AsyncOpenAIClient(base_url=args.ai_server) as client:
bot = AIRC(args.nickname, args.channel, client)
await bot.connect(args.server, args.port, tls=False)
await bot.handle_forever()
if __name__ == "__main__":
asyncio.run(main())

171
src/model-server.py Normal file
View File

@ -0,0 +1,171 @@
from flask import Flask, request, jsonify
import json
import asyncio
import argparse
import pydle
import torch
import logging
from ipex_llm.transformers import AutoModelForCausalLM
import transformers
import os
import re
import time
import datetime
import asyncio
import aiohttp
import json
from typing import Dict, Any
def parse_args():
parser = argparse.ArgumentParser(description="AI is Really Cool Server")
parser.add_argument("--device", type=int, default=0, help="Device # to use for inference. See --device-list")
#parser.add_argument("--device-list", help="List available devices")
parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
default='INFO', help='Set the logging level.')
return parser.parse_args()
def setup_logging(level):
numeric_level = getattr(logging, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {level}")
logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info(f"Logging is set to {level} level.")
class Chat():
def __init__(self, device_name):
super().__init__()
self.device_name = device_name
self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
self.context = None
self.model_path = 'Intel/neural-chat-7b-v3-3'
try:
logging.info(f"Loading tokenizer from: {self.model_path}")
self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad_token to eos_token if needed
self.model = AutoModelForCausalLM.from_pretrained(self.model_path,
load_in_4bit=True,
optimize_model=True,
trust_remote_code=True,
use_cache=True)
self.model = self.model.half().to(device_name)
except Exception as e:
logging.error(f"Loading error: {e}")
def remove_substring(self, string, substring):
return string.replace(substring, "")
def generate_response(self, text):
prompt = f"### System:\n{self.system_input}\n### User:\n{text}\n### Assistant:\n"
start = time.time()
with torch.autocast(self.device_name, dtype=torch.float16):
inputs = self.tokenizer.encode_plus(
prompt,
add_special_tokens=False,
return_tensors="pt",
max_length=1000, # Prevent 'Asking to truncate to max_length...'
padding=True, # Handles padding automatically
truncation=True
)
input_ids = inputs["input_ids"].to(self.device_name)
attention_mask = inputs["attention_mask"].to(self.device_name)
outputs = self.model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_length=1000,
num_return_sequences=1,
pad_token_id=self.tokenizer.eos_token_id
)
final_outputs = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
final_outputs = self.remove_substring(final_outputs, prompt).strip()
end = time.time()
return final_outputs, datetime.timedelta(seconds=end - start)
app = Flask(__name__)
# Basic endpoint for chat completions
@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
logging.info('/v1/chat/completions')
try:
# Get the JSON data from the request
data = request.get_json()
# Extract relevant fields from the request
model = data.get('model', 'default-model')
messages = data.get('messages', [])
temperature = data.get('temperature', 1.0)
max_tokens = data.get('max_tokens', 2048)
chat = app.config['chat']
logging.info(f"Query: {messages}")
response_content, _ = chat.generate_response(messages[-1]['content'])
logging.info(f"Response: {response_content}")
# Format response in OpenAI-compatible structure
response = {
"id": "chatcmpl-" + str(id(data)), # Simple unique ID
"object": "chat.completion",
"created": int(time.time()),
"model": chat.model_path,
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": response_content
},
"finish_reason": "stop"
}],
# "usage": {
# "prompt_tokens": len(str(messages).split()),
# "completion_tokens": len(response_content.split()),
# "total_tokens": len(str(messages).split()) + len(response_content.split())
# }
}
return jsonify(response)
except Exception as e:
logging.error(e)
return jsonify({
"error": {
"message": str(e),
"type": "invalid_request_error"
}
}), 400
# Health check endpoint
@app.route('/health', methods=['GET'])
def health():
return jsonify({"status": "healthy"}), 200
if __name__ == '__main__':
import time # Imported here for the timestamp
# Parse command-line arguments
args = parse_args()
# Setup logging based on the provided level
setup_logging(args.level)
if not torch.xpu.is_available():
logging.error("No XPU available.")
exit(1)
device_count = torch.xpu.device_count();
for i in range(device_count):
logging.info(f"Device {i}: {torch.xpu.get_device_name(i)} Total memory: {torch.xpu.get_device_properties(i).total_memory}")
device_name = 'xpu'
device = torch.device(device_name)
print(f"Using device: {device}")
# Set environment variables that might help with XPU stability
os.environ["SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"] = "1"
app.config['chat'] = Chat(device_name)
app.run(host='0.0.0.0', port=5000, debug=True)

56
src/pydle.patch Normal file
View File

@ -0,0 +1,56 @@
diff --git a/__init__.py b/__init__.py
index 2ead20d..892471b 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,11 +1,21 @@
# noinspection PyUnresolvedReferences
-from asyncio import coroutine, Future
+from asyncio import Future
from functools import cmp_to_key
from . import connection, protocol, client, features
from .client import Error, NotInChannel, AlreadyInChannel, BasicClient, ClientPool
from .features.ircv3.cap import NEGOTIATING as CAPABILITY_NEGOTIATING, FAILED as CAPABILITY_FAILED, \
NEGOTIATED as CAPABILITY_NEGOTIATED
+import asyncio
+# And use asyncio.coroutine where it was used, although it's better to switch to async def
+# However, since 'coroutine' decorator is removed, you would actually need to:
+from functools import wraps
+
+def coroutine(func):
+ @wraps(func)
+ async def wrapper(*args, **kwargs):
+ return func(*args, **kwargs)
+ return wrapper
__name__ = 'pydle'
__version__ = '0.9.4rc1'
diff --git a/connection.py b/connection.py
index c9a9e8e..5445b0e 100644
--- a/connection.py
+++ b/connection.py
@@ -37,6 +37,7 @@ class Connection:
self.reader = None
self.writer = None
self.eventloop = eventloop or asyncio.new_event_loop()
+ self.lock = asyncio.Lock()
async def connect(self):
""" Connect to target. """
@@ -49,8 +50,7 @@ class Connection:
host=self.hostname,
port=self.port,
local_addr=self.source_address,
- ssl=self.tls_context,
- loop=self.eventloop
+ ssl=self.tls_context
)
def create_tls_context(self):
@@ -112,4 +112,5 @@ class Connection:
await self.writer.drain()
async def recv(self, *, timeout=None):
- return await asyncio.wait_for(self.reader.readline(), timeout=timeout)
+ async with self.lock:
+ return await asyncio.wait_for(self.reader.readline(), timeout=timeout)