Initial commit

2025-03-06 13:48:18 -08:00 · 2025-03-06 13:48:18 -08:00 · d0b652aa09
commit d0b652aa09
11 changed files with 955 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,2 @@
+*
+!src
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+.env
+cache/**
--- a/396
+++ b/396
@ -0,0 +1,396 @@
+FROM ubuntu:oracular AS pytorch-build
+
+SHELL [ "/bin/bash", "-c" ]
+
+# Instructions Dockerfied from:
+#
+# https://github.com/pytorch/pytorch
+#
+# and
+#
+# https://pytorch.org/docs/stable/notes/get_start_xpu.html
+# https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-6.html
+# 
+#
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    gpg \
+    wget \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+
+# ipex only supports python 3.11, so use 3.11 instead of latest oracular (3.12)
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    build-essential \
+    ca-certificates \
+    ccache \
+    cmake \
+    curl \
+    git \
+    gpg-agent \
+    less \
+    libbz2-dev \
+    libffi-dev \
+    libjpeg-dev \
+    libpng-dev \
+    libreadline-dev \
+    libssl-dev \
+    libsqlite3-dev \
+    llvm \
+    nano \
+    wget \
+    zlib1g-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+#    python3 \
+#    python3-pip \
+#    python3-venv \
+#    python3-dev \
+
+RUN /usr/sbin/update-ccache-symlinks
+RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
+
+# Build Python in /opt/..., install it locally, then remove the build environment 
+# collapsed to a single docker layer.
+WORKDIR /opt
+ENV PYTHON_VERSION=3.11.9
+
+RUN wget -q -O - https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz | tar -xz \
+    && cd Python-${PYTHON_VERSION} \
+    && ./configure --prefix=/opt/python --enable-optimizations \
+    && make -j$(nproc) \
+    && make install \
+    && cd /opt \
+    && rm -rf Python-${PYTHON_VERSION}
+
+WORKDIR /opt/pytorch
+
+FROM ubuntu:oracular AS ze-monitor
+# From https://github.com/jketreno/ze-monitor
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    build-essential \
+    debhelper \
+    devscripts \
+    cmake \
+    git \
+    libfmt-dev \
+    libncurses-dev \
+    rpm \
+    rpm2cpio \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+RUN apt-get install -y \
+    software-properties-common \
+    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
+    && apt-get update \
+    && apt-get install -y \
+    libze-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+RUN git clone --depth 1 --branch v0.3.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor
+WORKDIR /opt/ze-monitor/build
+RUN cmake .. \
+    && make \
+    && cpack
+
+FROM pytorch-build AS pytorch
+
+COPY --from=pytorch-build /opt/pytorch /opt/pytorch
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
+    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libze-intel-gpu1 \
+    libze1 \
+    intel-ocloc \
+    intel-opencl-icd \
+    xpu-smi \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+RUN update-alternatives --install /usr/bin/python3 python3 /opt/python/bin/python3.11 2
+
+# When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries.
+ENV SYCL_CACHE_PERSISTENT=1
+
+WORKDIR /opt/pytorch
+
+RUN { \
+    echo '#!/bin/bash' ; \
+    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
+    echo 'source /opt/pytorch/venv/bin/activate' ; \
+    echo 'bash -c "${@}"' ; \
+    } > /opt/pytorch/shell ; \
+    chmod +x /opt/pytorch/shell
+
+RUN python3 -m venv --system-site-packages /opt/pytorch/venv
+
+SHELL [ "/opt/pytorch/shell" ]
+
+RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
+RUN pip3 freeze > /opt/pytorch/requirements.txt
+
+SHELL [ "/bin/bash", "-c" ]
+
+RUN { \
+    echo '#!/bin/bash' ; \
+    echo 'echo "Container: pytorch"' ; \
+    echo 'set -e' ; \
+    echo 'echo "Setting pip environment to /opt/pytorch"' ; \
+    echo 'source /opt/pytorch/venv/bin/activate'; \
+    echo 'if [[ "${1}" == "" ]] || [[ "${1}" == "shell" ]]; then' ; \
+    echo '  echo "Dropping to shell"' ; \
+    echo '  /bin/bash -c "source /opt/pytorch/venv/bin/activate ; /bin/bash"' ; \
+    echo 'else' ; \
+    echo '  exec "${@}"' ; \
+    echo 'fi' ; \
+    } > /entrypoint.sh \
+    && chmod +x /entrypoint.sh
+
+ENTRYPOINT [ "/entrypoint.sh" ]
+
+FROM pytorch AS ipex-2.6.10
+
+WORKDIR /opt
+RUN git clone --branch release/xpu/2.6.10 --depth 1 https://github.com/intel/intel-extension-for-pytorch.git ipex-2.6.10
+WORKDIR /opt/ipex-2.6.10
+
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+    | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
+    | tee /etc/apt/sources.list.d/oneAPI.list \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    intel-deep-learning-essentials-2025.0 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+# Requirements for building ipex / oneAPI...
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libspdlog-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+RUN python3 -m venv --system-site-packages /opt/ipex-2.6.10/venv
+
+RUN { \
+    echo '#!/bin/bash' ; \
+    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
+    echo 'source /opt/intel/oneapi/setvars.sh' ; \
+    echo 'source /opt/ipex-2.6.10/venv/bin/activate' ; \
+    echo 'bash -c "${@}"' ; \
+    } > /opt/ipex-2.6.10/shell ; \
+    chmod +x /opt/ipex-2.6.10/shell
+
+SHELL [ "/opt/ipex-2.6.10/shell" ]
+
+#RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
+RUN pip3 install -r requirements.txt
+
+RUN git submodule update --init --recursive --depth 1
+
+# Building ipex-2.6.10 wheel requires level-zero loader (libze-dev)
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    libze-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+# torch needs to be installed
+RUN pip3 install torch --index-url https://download.pytorch.org/whl/test/xpu
+
+RUN python setup.py bdist_wheel
+
+FROM pytorch AS ipex-llm-src
+
+# Build ipex-llm from source
+
+RUN git clone --depth 1 https://github.com/intel/ipex-llm.git /opt/ipex-llm
+
+WORKDIR /opt/ipex-llm
+
+RUN python3 -m venv --system-site-packages /opt/ipex-llm/venv
+RUN { \
+    echo '#!/bin/bash' ; \
+    update-alternatives --set python3 /opt/python/bin/python3.11 ; \
+    echo 'source /opt/ipex-llm/venv/bin/activate' ; \
+    echo 'bash -c "${@}"' ; \
+    } > /opt/ipex-llm/shell ; \
+    chmod +x /opt/ipex-llm/shell
+
+SHELL [ "/opt/ipex-llm/shell" ]
+
+RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
+COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/intel_extension_for_pytorch-2.6.10*.whl /opt/wheels/
+RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
+
+WORKDIR /opt/ipex-llm/python/llm
+RUN pip install requests wheel
+RUN python setup.py clean --all bdist_wheel --linux
+
+FROM airc AS jupyter
+
+SHELL [ "/opt/airc/shell" ]
+
+# BEGIN setup Jupyter
+RUN pip install jupyter \
+    jupyterlab==4.3.0a0 \
+    jupyterhub==5.0.0 \
+    notebook==7.3.0a0 \
+    "jupyter-server-proxy>=4.1.2"
+# END setup Jupyter
+
+SHELL [ "/bin/bash", "-c" ]
+
+RUN { \
+    echo '#!/bin/bash' ; \
+    echo 'echo "Container: airc jupyter"' ; \
+    echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
+    echo '  if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
+    echo '    echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
+    echo '    exit 1' ; \
+    echo '  else' ; \
+    echo '    if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
+    echo '    echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
+    echo '  fi' ; \
+    echo 'fi' ; \
+    echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
+    echo 'source /opt/intel/oneapi/setvars.sh' ; \
+    echo 'source /opt/airc/venv/bin/activate' ; \
+    echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash; exit $?; fi' ; \
+    echo 'while true; do' ; \
+    echo '  echo "Launching jupyter notebook"' ; \
+    echo '  jupyter notebook \' ; \
+    echo '    --notebook-dir=/opt/jupyter \' ; \
+    echo '    --port 8888 \' ; \
+    echo '    --ip 0.0.0.0 \' ; \
+    echo '    --no-browser \' ; \
+    echo '    --allow-root \' ; \
+    echo '    --ServerApp.token= \' ; \
+    echo '    --ServerApp.password= \' ; \
+    echo '    --ServerApp.allow_origin=* \' ; \
+    echo '    --ServerApp.base_url="/jupyter" \' ; \
+    echo '    "${@}" \' ; \
+    echo '    >> "/root/.cache/jupyter.log" 2>&1' ; \
+    echo '  echo "jupyter notebook died ($?). Restarting."' ; \
+    echo '  sleep 5' ; \
+    echo 'done' ; \
+    } > /entrypoint-jupyter.sh \
+    && chmod +x /entrypoint-jupyter.sh
+
+ENTRYPOINT [ "/entrypoint-jupyter.sh" ]
+
+FROM pytorch AS airc
+
+RUN python3 -m venv --system-site-packages /opt/airc/venv
+
+# Don't install the full oneapi essentials; just the ones that we seem to need
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+    | gpg --dearmor -o /usr/share/keyrings/oneapi-archive-keyring.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
+    | tee /etc/apt/sources.list.d/oneAPI.list \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    intel-oneapi-mkl-sycl-2025.0 \
+    intel-oneapi-dnnl-2025.0 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}
+
+RUN { \
+    echo '#!/bin/bash' ; \
+    echo 'update-alternatives --set python3 /opt/python/bin/python3.11' ; \
+    echo 'source /opt/intel/oneapi/setvars.sh' ; \
+    echo 'source /opt/airc/venv/bin/activate' ; \
+    echo 'if [[ "$1" == "" ]]; then bash -c; else bash -c "${@}"; fi' ; \
+    } > /opt/airc/shell ; \
+    chmod +x /opt/airc/shell
+
+SHELL [ "/opt/airc/shell" ]
+
+RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu
+# Install ipex built in ipex-2.6.10
+COPY --from=ipex-2.6.10 /opt/ipex-2.6.10/dist/*.whl /opt/wheels/
+RUN for pkg in /opt/wheels/intel_extension_for_pytorch-2.6.10*.whl; do pip install $pkg[xpu-2-6]; done
+# Install ipex-llm built in ipex-llm-src
+COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
+RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done
+
+COPY src/ /opt/airc/src/
+
+# pydle does not work with newer asyncio due to coroutine
+# being deprecated. Patch to work.
+RUN pip3 install pydle transformers sentencepiece accelerate \
+    && patch -d /opt/airc/venv/lib/python3*/site-packages/pydle \
+    -p1 < /opt/airc/src/pydle.patch
+
+# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
+RUN pip install transformers==4.40
+
+RUN pip3 install pydle transformers sentencepiece accelerate
+
+# To get xe_linear and other Xe methods    
+RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'
+
+# trl.core doesn't have what is needed with the default 'pip install trl' version
+RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
+
+# Needed by src/model-server.py
+RUN pip install flask
+
+SHELL [ "/bin/bash", "-c" ]
+
+RUN { \
+    echo '#!/bin/bash' ; \
+    echo 'set -e' ; \
+    echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
+    echo '  if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
+    echo '    echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
+    echo '    exit 1' ; \
+    echo '  else' ; \
+    echo '    if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
+    echo '    echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
+    echo '  fi' ; \
+    echo 'fi' ; \
+    echo 'echo "Container: airc"' ; \
+    echo 'echo "Setting pip environment to /opt/airc"' ; \
+    echo 'source /opt/intel/oneapi/setvars.sh'; \
+    echo 'source /opt/airc/venv/bin/activate'; \
+    echo 'if [[ "${1}" == "shell" ]] || [[ "${1}" == "/bin/bash" ]]; then' ; \
+    echo '  echo "Dropping to shell"' ; \
+    echo '  /bin/bash -c "source /opt/airc/venv/bin/activate ; /bin/bash"' ; \
+    echo '  exit $?' ; \
+    echo 'else' ; \
+    echo '  while true; do' ; \
+    echo '    echo "Launching model-server"' ; \
+    echo '    python src/model-server.py \' ; \
+    echo '      2>&1 | tee -a "/root/.cache/model-server.log"'; \
+    echo '    echo "model-server died ($?). Restarting."' ; \
+    echo '    sleep 5' ; \
+    echo '  done &' ; \
+    echo '  while true; do' ; \
+    echo '    echo "Launching airc"' ; \
+    echo '    python src/airc.py "${@}" \' ; \
+    echo '      2>&1 | tee -a "/root/.cache/airc.log"' ; \
+    echo '    echo "airc died ($?). Restarting."' ; \
+    echo '    sleep 5' ; \
+    echo '  done' ; \
+    echo 'fi' ; \
+    } > /entrypoint-airc.sh \
+    && chmod +x /entrypoint-airc.sh
+
+COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
+RUN dpkg -i /opt/ze-monitor-*deb
+
+WORKDIR /opt/airc
+
+ENTRYPOINT [ "/entrypoint-airc.sh" ]
--- a/24
+++ b/24
@ -0,0 +1,24 @@
+BSD 2-Clause License
+
+Copyright (c) 2025, James Ketrenos
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,86 @@
+# AIRC (pronounced Eric)
+
+AI is Really Cool
+
+NOTE: If running on an Intel Arc A series graphics processor, fp64 is not supported and may need to either be emulated or have the model quantized.
+
+This project provides container definitions that will provide PyTorch 2.6 with
+Intel's LLM project. In addition, it provides a small local chat server and an IRC client to provide a chat bot.
+
+# Installation
+
+This project uses docker containers to build. As this was originally
+written to work on an Intel Arc B580 (Battlemage), it requires a
+kernel that supports that hardware, such as the one documented
+at [Intel Graphics Preview](https://github.com/canonical/intel-graphics-preview), which runs in Ubuntu Oracular (24.10)..
+
+NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
+
+## Want to run under WSL2? No can do...
+
+https://www.intel.com/content/www/us/en/support/articles/000093216/graphics/processor-graphics.html
+
+The A- and B-series discrete GPUs do not support SR-IOV, required for
+the GPU partitioning that Microsoft Windows uses in order to support GPU acceleration in WSL.
+
+## Building
+
+NOTE: You need 'docker compose' installed. See [Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
+
+
+```bash
+git clone https://github.com/jketreno/airc
+cd airc
+docker compose build
+```
+
+## Running
+
+In order to download the models, you need to have a Hugging Face
+token. See https://huggingface.co/settings/tokens for information
+on obtaining a token.
+
+Edit .env to add the following:
+
+```.env
+HF_ACCESS_TOKEN=<access token from huggingface>
+```
+
+NOTE: Models downloaded by most examples will be placed in the
+./cache directory, which is bind mounted to the container.
+
+### AIRC
+
+To launch the airc shell interactively, with the pytorch 2.6
+environment loaded, use the default entrypoint to launch a shell:
+
+```bash
+docker compose run --rm airc shell
+```
+
+Once in the shell, you can then launch the model-server.py and then
+the airc.py client:
+
+```bash
+docker compose run --rm airc shell
+src/airc.py --ai-server=http://localhost:5000 &
+src/model-server.py
+```
+
+By default, src/airc.py will connect to irc.libera.chat on the airc-test
+channel. See `python src/airc.py --help` for options.
+
+By separating the model-server into its own process, you can develop
+and tweak the chat backend without losing the IRC connection established
+by airc.
+
+### Jupyter
+
+```bash
+docker compose up jupyter -d
+```
+
+The default port for inbound connections is 8888 (see docker-compose.yml).
+$(pwd)/jupyter is bind mounted to /opt/juypter in the container, which is where notebooks will be saved by default.
+
+To access the jupyter notebook, go to `https://localhost:8888/jupyter`.
--- a/cache/.keep
+++ b/cache/.keep
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,31 @@
+services:
+  airc:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: airc
+    image: airc
+    restart: "no"
+    env_file:
+      - .env
+    devices:
+      - /dev/dri:/dev/dri
+    volumes:
+      - ./cache:/root/.cache
+      - ./src:/opt/airc/src:rw
+
+  jupyter:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: jupyter
+    image: jupyter
+    env_file:
+      - .env
+    devices:
+      - /dev/dri:/dev/dri
+    ports:
+    - 8888:8888 # Jupyter Notebook
+    volumes:
+      - ./jupyter:/opt/jupyter:rw
+      - ./cache:/root/.cache
--- a/src/.keep
+++ b/src/.keep
--- a/src/airc.py
+++ b/src/airc.py
@ -0,0 +1,187 @@
+import asyncio
+import aiohttp
+import argparse
+import pydle
+import logging
+import os
+import re
+import time
+import datetime
+import asyncio
+import json
+from typing import Dict, Any
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="AI is Really Cool")
+    parser.add_argument("--server", type=str, default="irc.libera.chat", help="IRC server address")
+    parser.add_argument("--port", type=int, default=6667, help="IRC server port")
+    parser.add_argument("--nickname", type=str, default="airc", help="Bot nickname")
+    parser.add_argument("--channel", type=str, default="#airc-test", help="Channel to join")
+    parser.add_argument("--ai-server", type=str, default="http://localhost:5000", help="OpenAI API endpoint")
+    parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 
+                        default='INFO', help='Set the logging level.')
+    return parser.parse_args()
+
+class AsyncOpenAIClient:
+    def __init__(self, base_url: str = "http://localhost:5000"):
+        logging.info(f"Using {base_url} as server")
+        self.base_url = base_url
+        self.session = None
+
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession()
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.close()
+
+    async def chat_completion(self, 
+                            messages: list,
+                            model: str = "my-model",
+                            temperature: float = 0.7,
+                            max_tokens: int = 100) -> Dict[str, Any]:
+        """
+        Make an async chat completion request
+        """
+        url = f"{self.base_url}/v1/chat/completions"
+        
+        # Prepare the request payload
+        payload = {
+            "model": model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens
+        }
+
+        try:
+            async with self.session.post(url, json=payload) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    raise Exception(f"Request failed with status {response.status}: {error_text}")
+                
+                return await response.json()
+        
+        except Exception as e:
+            print(f"Error during request: {str(e)}")
+            return {"error": str(e)}
+
+def setup_logging(level):
+    numeric_level = getattr(logging, level.upper(), None)
+    if not isinstance(numeric_level, int):
+        raise ValueError(f"Invalid log level: {level}")
+    
+    logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
+    logging.info(f"Logging is set to {level} level.")
+
+class AIRC(pydle.Client):
+    def __init__(self, nick, channel, client, burst_limit = 5, rate_limit = 1.0, burst_reset_timeout = 10.0):
+        super().__init__(nick)
+        self.nick = nick
+        self.channel = channel
+        self.burst_limit = burst_limit
+        self.sent_burst = 0
+        self.rate_limit = rate_limit
+        self.burst_reset_timeout = burst_reset_timeout
+        self.sent_burst = 0  # Track messages sent in burst
+        self.last_message_time = None  # Track last message time
+        self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
+        self._message_queue = asyncio.Queue()
+        self._task = asyncio.create_task(self._send_from_queue())
+        self.client = client
+
+    async def _send_from_queue(self):
+        """Background task that sends queued messages with burst + rate limiting."""
+        while True:
+            target, message = await self._message_queue.get()
+
+            # If burst is still available, send immediately
+            if self.sent_burst < self.burst_limit:
+                self.sent_burst += 1
+            else:
+                await asyncio.sleep(self.rate_limit)  # Apply rate limit
+            
+            await super().message(target, message)  # Send message
+            self.last_message_time = asyncio.get_event_loop().time()  # Update last message timestamp
+            
+            # Start burst reset countdown after each message
+            asyncio.create_task(self._reset_burst_after_inactivity())
+
+    async def _reset_burst_after_inactivity(self):
+        """Resets burst counter only if no new messages are sent within timeout."""
+        last_time = self.last_message_time
+        await asyncio.sleep(self.burst_reset_timeout)  # Wait for inactivity period
+
+        # Only reset if no new messages were sent during the wait
+        if self.last_message_time == last_time:
+            self.sent_burst = 0
+            logging.info("Burst limit reset due to inactivity.")
+
+    async def message(self, target, message):
+        """Splits a multi-line message and sends each line separately."""
+        for line in message.splitlines():  # Splits on both '\n' and '\r\n'
+            if line.strip():  # Ignore empty lines
+                await self._message_queue.put((target, line))
+
+    async def on_connect(self):
+        logging.debug('on_connect')
+        await self.join(self.channel)
+
+    def remove_substring(self, string, substring):
+        return string.replace(substring, "")    
+
+    def extract_nick_message(self, input_string):
+        # Pattern with capturing groups for nick and message
+        pattern = r"^\s*([^\s:]+?)\s*:\s*(.+?)$"
+        
+        match = re.match(pattern, input_string)
+        if match:
+            nick = match.group(1)    # First capturing group
+            message = match.group(2)  # Second capturing group
+            return nick, message
+        return None, None  # Return None for both if no match
+    
+    async def on_message(self, target, source, message):
+        if source == self.nick:
+            return
+        nick, body = self.extract_nick_message(message)
+        if nick == self.nick:
+            content = None
+            if body == "stats":
+                content = f"{self.queries} queries handled in {self.processing}s"
+            else:
+                # Sample messages
+                messages = [
+                    {"role": "system", "content": self.system_input},
+                    {"role": "user", "content": body}
+                ]
+
+                # Make the request
+                response = await self.client.chat_completion(messages)
+
+                # Extract and print just the assistant's message if available
+                if "choices" in response and len(response["choices"]) > 0:
+                    content = response["choices"][0]["message"]["content"]
+                    print(f"\nAssistant: {content}")
+
+            if content:
+                logging.info(f'Sending: {content}')
+                await self.message(target, f"{content}")
+
+def remove_substring(string, substring):
+    return string.replace(substring, "")
+        
+async def main():
+    # Parse command-line arguments
+    args = parse_args()
+    
+    # Setup logging based on the provided level
+    setup_logging(args.level)
+
+    async with AsyncOpenAIClient(base_url=args.ai_server) as client:
+        bot = AIRC(args.nickname, args.channel, client)
+        await bot.connect(args.server, args.port, tls=False)
+        await bot.handle_forever()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/src/model-server.py
+++ b/src/model-server.py
@ -0,0 +1,171 @@
+from flask import Flask, request, jsonify
+import json
+import asyncio
+import argparse
+import pydle
+import torch
+import logging
+from ipex_llm.transformers import AutoModelForCausalLM
+import transformers
+import os
+import re
+import time
+import datetime
+import asyncio
+import aiohttp
+import json
+from typing import Dict, Any
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="AI is Really Cool Server")
+    parser.add_argument("--device", type=int, default=0, help="Device # to use for inference. See --device-list")
+    #parser.add_argument("--device-list", help="List available devices")
+    parser.add_argument('--level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 
+                        default='INFO', help='Set the logging level.')
+    return parser.parse_args()
+
+def setup_logging(level):
+    numeric_level = getattr(logging, level.upper(), None)
+    if not isinstance(numeric_level, int):
+        raise ValueError(f"Invalid log level: {level}")
+    
+    logging.basicConfig(level=numeric_level, format='%(asctime)s - %(levelname)s - %(message)s')
+    logging.info(f"Logging is set to {level} level.")
+
+class Chat():
+    def __init__(self, device_name):
+        super().__init__()
+        self.device_name = device_name
+        self.system_input = "You are a critical assistant. Give concise and accurate answers in less than 120 characters."
+        self.context = None
+        self.model_path = 'Intel/neural-chat-7b-v3-3'
+        try:
+            logging.info(f"Loading tokenizer from: {self.model_path}")
+            self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token  # Set pad_token to eos_token if needed
+
+            self.model = AutoModelForCausalLM.from_pretrained(self.model_path,
+                                                load_in_4bit=True,
+                                                optimize_model=True,
+                                                trust_remote_code=True,
+                                                use_cache=True)
+            self.model = self.model.half().to(device_name)
+        except Exception as e:
+            logging.error(f"Loading error: {e}")
+
+    def remove_substring(self, string, substring):
+        return string.replace(substring, "")
+
+    def generate_response(self, text):
+        prompt = f"### System:\n{self.system_input}\n### User:\n{text}\n### Assistant:\n"
+        start = time.time()
+
+        with torch.autocast(self.device_name, dtype=torch.float16):
+            inputs = self.tokenizer.encode_plus(
+                prompt, 
+                add_special_tokens=False,
+                return_tensors="pt", 
+                max_length=1000,            # Prevent 'Asking to truncate to max_length...'
+                padding=True,               # Handles padding automatically
+                truncation=True
+            )
+            input_ids = inputs["input_ids"].to(self.device_name)
+            attention_mask = inputs["attention_mask"].to(self.device_name)
+
+            outputs = self.model.generate(
+                input_ids=input_ids, 
+                attention_mask=attention_mask,
+                max_length=1000,
+                num_return_sequences=1,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+
+            final_outputs = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            final_outputs = self.remove_substring(final_outputs, prompt).strip()
+        
+        end = time.time()
+
+        return final_outputs, datetime.timedelta(seconds=end - start)
+
+app = Flask(__name__)
+
+# Basic endpoint for chat completions
+@app.route('/v1/chat/completions', methods=['POST'])
+def chat_completions():
+    logging.info('/v1/chat/completions')
+    try:
+        # Get the JSON data from the request
+        data = request.get_json()
+        
+        # Extract relevant fields from the request
+        model = data.get('model', 'default-model')
+        messages = data.get('messages', [])
+        temperature = data.get('temperature', 1.0)
+        max_tokens = data.get('max_tokens', 2048)
+        
+        chat = app.config['chat']
+        logging.info(f"Query: {messages}")
+        response_content, _ = chat.generate_response(messages[-1]['content'])
+        logging.info(f"Response: {response_content}")
+        # Format response in OpenAI-compatible structure
+        response = {
+            "id": "chatcmpl-" + str(id(data)),  # Simple unique ID
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": chat.model_path,
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": response_content
+                },
+                "finish_reason": "stop"
+            }],
+            # "usage": {
+            #     "prompt_tokens": len(str(messages).split()),
+            #     "completion_tokens": len(response_content.split()),
+            #     "total_tokens": len(str(messages).split()) + len(response_content.split())
+            # }
+        }
+        
+        return jsonify(response)
+    
+    except Exception as e:
+        logging.error(e)
+        return jsonify({
+            "error": {
+                "message": str(e),
+                "type": "invalid_request_error"
+            }
+        }), 400
+
+# Health check endpoint
+@app.route('/health', methods=['GET'])
+def health():
+    return jsonify({"status": "healthy"}), 200
+
+if __name__ == '__main__':
+    import time  # Imported here for the timestamp
+    # Parse command-line arguments
+    args = parse_args()
+    
+    # Setup logging based on the provided level
+    setup_logging(args.level)
+
+    if not torch.xpu.is_available():
+        logging.error("No XPU available.")
+        exit(1)
+    device_count = torch.xpu.device_count();
+    for i in range(device_count):
+        logging.info(f"Device {i}: {torch.xpu.get_device_name(i)} Total memory: {torch.xpu.get_device_properties(i).total_memory}")
+    device_name = 'xpu'
+    device = torch.device(device_name)
+    print(f"Using device: {device}")
+
+    # Set environment variables that might help with XPU stability
+    os.environ["SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"] = "1"
+
+    app.config['chat'] = Chat(device_name)
+
+    app.run(host='0.0.0.0', port=5000, debug=True)
--- a/src/pydle.patch
+++ b/src/pydle.patch
@ -0,0 +1,56 @@
+diff --git a/__init__.py b/__init__.py
+index 2ead20d..892471b 100644
+--- a/__init__.py
+++ b/__init__.py
+@@ -1,11 +1,21 @@
+ # noinspection PyUnresolvedReferences
+-from asyncio import coroutine, Future
+from asyncio import Future
+ from functools import cmp_to_key
+ from . import connection, protocol, client, features
+ from .client import Error, NotInChannel, AlreadyInChannel, BasicClient, ClientPool
+ from .features.ircv3.cap import NEGOTIATING as CAPABILITY_NEGOTIATING, FAILED as CAPABILITY_FAILED, \
+     NEGOTIATED as CAPABILITY_NEGOTIATED
+ 
+import asyncio
+# And use asyncio.coroutine where it was used, although it's better to switch to async def
+# However, since 'coroutine' decorator is removed, you would actually need to:
+from functools import wraps
+
+def coroutine(func):
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        return func(*args, **kwargs)
+    return wrapper
+ 
+ __name__ = 'pydle'
+ __version__ = '0.9.4rc1'
+diff --git a/connection.py b/connection.py
+index c9a9e8e..5445b0e 100644
+--- a/connection.py
+++ b/connection.py
+@@ -37,6 +37,7 @@ class Connection:
+         self.reader = None
+         self.writer = None
+         self.eventloop = eventloop or asyncio.new_event_loop()
+        self.lock = asyncio.Lock()
+ 
+     async def connect(self):
+         """ Connect to target. """
+@@ -49,8 +50,7 @@ class Connection:
+             host=self.hostname,
+             port=self.port,
+             local_addr=self.source_address,
+-            ssl=self.tls_context,
+-            loop=self.eventloop
+            ssl=self.tls_context
+         )
+ 
+     def create_tls_context(self):
+@@ -112,4 +112,5 @@ class Connection:
+         await self.writer.drain()
+ 
+     async def recv(self, *, timeout=None):
+-        return await asyncio.wait_for(self.reader.readline(), timeout=timeout)
+        async with self.lock:
+            return await asyncio.wait_for(self.reader.readline(), timeout=timeout)