#
# Build Pyton 3.11 for use in later stages
#    
FROM ubuntu:oracular AS python-local

SHELL [ "/bin/bash", "-c" ]

# Install some utilities frequently used
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    gpg \
    wget \
    nano \
    rsync \ 
    iputils-ping \
    jq \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

# Install latest Python3
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    python3 \
    python3-pip \
    python3-venv \
    python3-dev

FROM ubuntu:oracular AS ze-monitor
# From https://github.com/jketreno/ze-monitor
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    build-essential \
    debhelper \
    devscripts \
    cmake \
    git \
    libfmt-dev \
    libncurses-dev \
    rpm \
    rpm2cpio \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

RUN apt-get install -y \
    software-properties-common \
    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
    && apt-get update \
    && apt-get install -y \
    libze-dev \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

RUN git clone --depth 1 --branch v0.4.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor
WORKDIR /opt/ze-monitor/build
RUN cmake .. \
    && make \
    && cpack

# The main backstory image:
# * python 3.11
# * pytorch xpu w/ ipex-llm
# * ollama-ipex-llm
# * src/server.py     - model server supporting RAG and fine-tuned models
#
FROM python-local AS llm-base

# Install Intel graphics runtimes
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
    && apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    libze-intel-gpu1 \
    libze1 \
    intel-ocloc \
    intel-opencl-icd \
    xpu-smi \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

# pydub is loaded by torch, which will throw a warning if ffmpeg isn't installed
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
    && apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    ffmpeg \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

# Prerequisite for ze-monitor    
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    libncurses6 \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/
RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb
RUN usermod -aG ze-monitor root 

WORKDIR /opt/backstory

# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/backstory/venv

# Setup the docker pip shell
RUN { \
    echo '#!/bin/bash' ; \
    echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
    echo 'source /opt/backstory/venv/bin/activate' ; \
    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \
    } > /opt/backstory/shell ; \
    chmod +x /opt/backstory/shell

# Activate the pip environment on all shell calls
SHELL [ "/opt/backstory/shell" ]

# https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.7.10%2Bxpu&os=linux%2Fwsl2&package=pip
RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu
RUN pip install intel-extension-for-pytorch==2.7.10+xpu oneccl_bind_pt==2.7.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

# From https://huggingface.co/docs/bitsandbytes/main/en/installation?backend=Intel+CPU+%2B+GPU#multi-backend
# To use bitsandbytes non-CUDA backends, be sure to install:
RUN pip install "transformers>=4.45.1"
# Note, if you don't want to reinstall BNBs dependencies, append the `--no-deps` flag!
RUN pip install --force-reinstall "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl"
#RUN pip install --force-reinstall --no-deps "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-1.0.0-py3-none-manylinux_2_24_x86_64.whl"
# Install ollama python module
RUN pip install ollama langchain-ollama

RUN pip install setuptools --upgrade
RUN pip install ollama langchain-ollama
RUN pip install feedparser bs4 chromadb
RUN pip install tiktoken
RUN pip install flask flask_cors flask_sock
RUN pip install peft datasets

#COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/
#RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done

# mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest)
# as well as MistralSpda* and QwenSpda* things missing (needed when loading models with )
RUN pip install "sentence_transformers<3.4.1"
# "transformers==4.40.0" ""
#RUN pip install sentence_transformers "transformers==4.40.0" "trl<0.12.0"
#RUN pip install transformers==4.45.0 "trl<0.12.0"
# trl.core doesn't have what is needed with the default 'pip install trl' version
#RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c

# To get xe_linear and other Xe methods
# NOTE: As of 2025-03-10, these are only available for Python 3.11, hence
# why we build python from source    
RUN pip3 install 'bigdl-core-xe-all>=2.6.0b'

# NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env
# Required for IPEX optimize(), which is required to convert from Params4bit
RUN pip install einops diffusers 

# For image generation...
RUN pip install einops diffusers 
RUN pip install sentencepiece # Needed for FLUX
RUN pip install timm 
# Install xformers from source
RUN pip install --no-binary xformers xformers

# Needed by src/utils/rag.py
RUN pip install watchdog

# Install packages needed for utils/tools/*
RUN pip install yfinance pyzt geopy

# Install packages needed for vector operations
RUN pip install umap-learn

# Needed for random name generation
RUN pip install names-dataset

FROM llm-base AS backstory

SHELL [ "/opt/backstory/shell" ]

#COPY /src/requirements.txt /opt/backstory/requirements.txt
#RUN pip install -r /opt/backstory/requirements.txt
RUN pip install 'markitdown[all]' pydantic 'pydantic[email]'

# Prometheus
RUN pip install prometheus-client prometheus-fastapi-instrumentator

# Redis
RUN pip install "redis[hiredis]>=4.5.0"

# New backend implementation
RUN pip install fastapi uvicorn "python-jose[cryptography]" bcrypt python-multipart schedule

# Needed for email verification
RUN pip install pyyaml user-agents cryptography

# OpenAPI CLI generator
RUN pip install openapi-python-client

# QR code generator
RUN pip install setuptools pyqrcode pypng

# Anthropic and other backends
RUN pip install anthropic pydantic_ai

# Automatic type conversion pydantic -> typescript
RUN pip install pydantic typing-inspect jinja2

RUN pip freeze > /opt/backstory/requirements.txt

RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    nodejs \
    npm \
    && npm install -g typescript \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

SHELL [ "/bin/bash", "-c" ]

RUN { \
    echo '#!/bin/bash'; \
    echo 'echo "Container: backstory"'; \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/backstory"'; \
    echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
    echo 'if [[ ! -d /opt/backstory/venv/bin ]]; then'; \
    echo '  python3 -m venv --system-site-packages /opt/backstory/venv'; \
    echo '  pip install -r /opt/backstory/requirements.txt'; \
    echo 'fi'; \
    echo 'source /opt/backstory/venv/bin/activate'; \
    echo ''; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
    echo '  shift' ; \
    echo '  echo "Running: ${@}"' ; \
    echo '  if [[ "${1}" != "" ]]; then' ; \
    echo '    bash -c "${@}"'; \
    echo '  else' ; \
    echo '    exec /bin/bash -i'; \
    echo '  fi' ; \
    echo 'else'; \
    echo '  if [[ ! -e src/cert.pem ]]; then' ; \
    echo '    echo "Generating self-signed certificate for HTTPS"'; \
    echo '    openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout src/key.pem -out src/cert.pem -subj "/C=US/ST=OR/L=Portland/O=Development/CN=localhost"'; \
    echo '  fi' ; \
    echo '  declare once=0' ; \
    echo '  while true; do'; \
    echo '    if [[ ! -e /opt/backstory/block-server ]]; then'; \
    echo '      echo "Launching Backstory server..."'; \
    echo '      python3 src/backend/main.py "${@}" || echo "Backstory server died."'; \
    echo '      echo "Sleeping for 3 seconds."'; \
    echo '    else'; \
    echo '      if [[ ${once} -eq 0 ]]; then' ; \
    echo '        echo "/opt/backstory/block-server exists. Sleeping for 3 seconds."'; \
    echo '        once=1' ; \
    echo '      fi' ; \
    echo '    fi' ; \
    echo '    sleep 3'; \
    echo '  done' ; \
    echo 'fi'; \
    } > /entrypoint.sh \
    && chmod +x /entrypoint.sh

# From 
ENV USE_XETLA=OFF
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
ENV SYCL_CACHE_PERSISTENT=1
ENV PATH=/opt/backstory:$PATH

ENTRYPOINT [ "/entrypoint.sh" ]

FROM backstory AS backstory-prod

COPY /src/ /opt/backstory/src/


FROM ubuntu:oracular AS ollama

# Get a couple prerequisites
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    gpg \
    wget \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

# Install Intel graphics runtimes
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \
    && add-apt-repository -y ppa:kobuk-team/intel-graphics \
    && apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    libze-intel-gpu1 \
    libze1 \
    intel-ocloc \
    intel-opencl-icd \
    xpu-smi \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

WORKDIR /opt/ollama

# Download the nightly ollama release from ipex-llm

# NOTE: NO longer at github.com/intel -- now at ipex-llm

# This version does not work:
ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.2.0/ollama-ipex-llm-2.2.0-ubuntu.tgz


# Does not work -- crashes
# ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250612-ubuntu.tgz

RUN wget -qO - ${OLLAMA_VERSION} | \
    tar --strip-components=1 -C . -xzv 

# Install Python from Oracular (ollama works with 3.12)
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    gpg \
    python3 \
    python3-pip \
    python3-venv \
    wget \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/ollama/venv

# Setup the docker pip shell
RUN { \
    echo '#!/bin/bash' ; \
    echo 'source /opt/ollama/venv/bin/activate' ; \
    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \
    } > /opt/ollama/shell ; \
    chmod +x /opt/ollama/shell

# Activate the pip environment on all shell calls
SHELL [ "/opt/ollama/shell" ]

# Install ollama python module
RUN pip install ollama langchain-ollama

SHELL [ "/bin/bash", "-c" ]

RUN { \
    echo '#!/bin/bash'; \
    echo 'echo "Container: ollama"'; \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/ollama"'; \
    echo 'source /opt/ollama/venv/bin/activate'; \
    echo 'export OLLAMA_NUM_GPU=999'; \
    echo 'export ZES_ENABLE_SYSMAN=1'; \
    echo 'export SYCL_CACHE_PERSISTENT=1'; \
    echo 'export OLLAMA_KEEP_ALIVE=-1'; \
    echo 'export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1'; \
    echo ''; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
    echo '  shift'; \
    echo '  if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \
    echo 'else'; \
    echo '  echo "Launching Ollama server..."'; \
    echo '  exec ollama serve'; \
    echo 'fi'; \
    } > /entrypoint.sh \
    && chmod +x /entrypoint.sh

RUN { \
    echo '#!/bin/bash'; \
    echo 'echo "Container: ollama"'; \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/ollama"'; \
    echo 'source /opt/ollama/venv/bin/activate'; \
    echo 'ollama pull qwen2.5:7b' ; \
    echo 'ollama pull llama3.2' ; \
    echo 'ollama pull mxbai-embed-large' ; \
    echo 'ollama pull deepseek-r1:7b' ; \
    echo 'ollama pull mistral:7b' ; \
    } > /fetch-models.sh \
    && chmod +x /fetch-models.sh

ENV PYTHONUNBUFFERED=1

# Enable ext_intel_free_memory
ENV ZES_ENABLE_SYSMAN=1

# Use all GPUs
ENV OLLAMA_NUM_GPU=999

# Use immediate command lists
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1

# Use persistent cache
ENV SYCL_CACHE_PERSISTENT=1

VOLUME [" /root/.ollama" ]

ENV PATH=/opt/ollama:${PATH}

ENTRYPOINT [ "/entrypoint.sh" ]

FROM llm-base AS jupyter

# npm and Node.JS are required for jupyterlab
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    nodejs \
    npm \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

SHELL [ "/opt/backstory/shell" ]

# BEGIN setup Jupyter
RUN pip install \
    jupyterlab \
    dash[jupyterlab] \
    && jupyter lab build --dev-build=False --minimize=False
# END setup Jupyter

#COPY /src/requirements.txt /opt/backstory/src/requirements.txt
#RUN pip install -r /opt/backstory/src/requirements.txt

SHELL [ "/bin/bash", "-c" ]

RUN { \
    echo '#!/bin/bash' ; \
    echo 'echo "Container: backstory jupyter"' ; \
    echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \
    echo '  if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \
    echo '    echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=<token>" >&2' ; \
    echo '    exit 1' ; \
    echo '  else' ; \
    echo '    if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \
    echo '    echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \
    echo '  fi' ; \
    echo 'fi' ; \
    echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \
    echo 'source /opt/backstory/venv/bin/activate' ; \
    echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash -i; exit $?; fi' ; \
    echo 'while true; do' ; \
    echo '  echo "Launching jupyter lab"' ; \
    echo '  jupyter lab \' ; \
    echo '    --notebook-dir=/opt/jupyter \' ; \
    echo '    --port 8888 \' ; \
    echo '    --ip 0.0.0.0 \' ; \
    echo '    --allow-root \' ; \
    echo '    --ServerApp.token= \' ; \
    echo '    --ServerApp.password= \' ; \
    echo '    --ServerApp.allow_origin=* \' ; \
    echo '    --ServerApp.base_url="/jupyter" \' ; \
    echo '    "${@}" \' ; \
    echo '    2>&1 | tee -a "/root/.cache/jupyter.log"' ; \
    echo '  echo "jupyter notebook died ($?). Restarting."' ; \
    echo '  sleep 5' ; \
    echo 'done' ; \
    } > /entrypoint-jupyter.sh \
    && chmod +x /entrypoint-jupyter.sh
# echo '    --no-browser \' ; \

WORKDIR /opt/jupyter

ENV PATH=/opt/backstory:$PATH


ENTRYPOINT [ "/entrypoint-jupyter.sh" ]

FROM python-local AS miniircd

# Get a couple prerequisites
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    gpg \
    wget \
    nano \
    irssi \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

WORKDIR /opt/miniircd

# Setup the ollama python virtual environment
RUN python3 -m venv --system-site-packages /opt/miniircd/venv

# Setup the docker pip shell
RUN { \
    echo '#!/bin/bash' ; \
    echo 'source /opt/miniircd/venv/bin/activate' ; \
    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \
    } > /opt/miniircd/shell ; \
    chmod +x /opt/miniircd/shell

# Activate the pip environment on all shell calls
SHELL [ "/opt/miniircd/shell" ]

RUN pip install miniircd

SHELL [ "/bin/bash", "-c" ]

RUN { \
    echo '#!/bin/bash'; \
    echo 'echo "Container: miniircd"'; \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/miniircd"'; \
    echo 'source /opt/miniircd/venv/bin/activate'; \
    echo ''; \
    echo 'if [[ "${1}" == "/bin/bash -i" ]] || [[ "${1}" =~ ^(/opt/miniircd/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
    echo '  shift' ; \
    echo '  echo "Running: ${@}"' ; \
    echo '  if [[ "${1}" != "" ]]; then' ; \
    echo '    bash -c "${@}"'; \
    echo '  else' ; \
    echo '    exec /bin/bash -i'; \
    echo '  fi' ; \
    echo 'else'; \
    echo '  echo "Launching IRC server..."'; \
    echo '  miniircd --setuid root "${@}"' ; \
    echo 'fi'; \
    } > /entrypoint.sh \
    && chmod +x /entrypoint.sh

ENTRYPOINT [ "/entrypoint.sh" ]

FROM ubuntu:oracular AS frontend

# The client frontend is built using React Expo to allow 
# easy creation of an Android app as well as web app
RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    rsync \
    nano \
    wget \
    curl \
    nodejs \
    npm \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

SHELL [ "/bin/bash", "-c" ]

RUN { \
    echo '#!/bin/bash'; \
    echo 'echo "Container: frontend"'; \
    echo 'set -e'; \
    echo ''; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
    echo '  shift' ; \
    echo '  echo "Running: ${@}"' ; \
    echo '  if [[ "${1}" != "" ]]; then' ; \
    echo '    bash -c "${@}"'; \
    echo '  else' ; \
    echo '    exec /bin/bash -i'; \
    echo '  fi' ; \
    echo 'fi' ; \
    echo 'cd /opt/backstory/frontend'; \
    echo 'if [[ "${1}" == "install" ]] || [[ ! -d node_modules ]]; then'; \
    echo '  echo "Installing node modules"'; \
    echo '  if [[ -d node_modules ]]; then'; \
    echo '    echo "Deleting current node_modules"'; \
    echo '    rm -rf node_modules'; \
    echo '  fi'; \
    echo '  npm install --force'; \
    echo 'fi'; \
    echo 'if [[ "${1}" == "build" ]]; then'; \
    echo '  echo "Building production static build"'; \
    echo '  ./build.sh'; \
    echo 'fi'; \
    echo 'while true; do'; \
    echo '  echo "Launching Backstory React Frontend..."'; \
    echo '  npm start "${@}" || echo "Backstory frontend died. Restarting in 3 seconds."'; \
    echo '  sleep 3'; \
    echo 'done' ; \
    } > /entrypoint.sh \
    && chmod +x /entrypoint.sh

WORKDIR /opt/backstory/frontend

RUN { \
    echo '#!/bin/bash' ; \
    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \
    } > /opt/backstory/shell ; \
    chmod +x /opt/backstory/shell

COPY /frontend/ /opt/backstory/frontend/
ENV PATH=/opt/backstory:$PATH

ENTRYPOINT [ "/entrypoint.sh" ]


# FROM ubuntu:24.04 AS ollama-ov-server

# SHELL ["/bin/bash", "-c"]

# RUN apt-get update && apt install -y software-properties-common libtbb-dev
# RUN add-apt-repository ppa:deadsnakes/ppa \
#     && apt-get update \
#     && apt-get install -y python3.10 net-tools
# RUN ln -sf /usr/bin/python3.10 /usr/bin/python3

# RUN apt-get install -y ca-certificates git wget curl gcc g++ \
#     && apt-get clean \
#     && rm -rf /var/lib/apt/lists/*

# WORKDIR /home/ollama_ov_server
# ARG GOVERSION=1.24.1
# RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
# ENV PATH=/usr/local/go/bin:$PATH

# RUN wget https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64.tar.gz
# RUN tar -xzf openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64.tar.gz
# ENV GENAI_DIR=/home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64

# RUN source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh

# ENV CGO_ENABLED=1
# ENV GODEBUG=cgocheck=0

# ENV CGO_LDFLAGS=-L$GENAI_DIR/runtime/lib/intel64
# ENV CGO_CFLAGS=-I$GENAI_DIR/runtime/include

# WORKDIR /home/ollama_ov_server
# RUN git clone https://github.com/openvinotoolkit/openvino_contrib.git
# WORKDIR /home/ollama_ov_server/openvino_contrib/modules/ollama_openvino

# RUN go build -o /usr/bin/ollama .

# ENV OLLAMA_HOST=0.0.0.0:11434
# EXPOSE 11434

# RUN apt-get update \
#     && DEBIAN_FRONTEND=noninteractive apt-get install -y \
#     pip \
#     && apt-get clean \
#     && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

# RUN pip install huggingface_hub modelscope

# #ENV model=Qwen3-4B-int4-ov
# #ENV model=Qwen3-8B-int4-ov -- didn't work
# #RUN huggingface-cli download OpenVINO/${model}
# #RUN modelscope download --model OpenVINO/${model} --local_dir ./${model}

# #RUN tar -zcvf /root/.ollama/models/${model}.tar.gz /root/.cache/hub/models--OpenVINO--${model}
# #RUN { \
# #    echo "FROM ${model}.tar.gz" ; \
# #    echo "ModelType 'OpenVINO'" ; \
# #} > /root/.ollama/models/Modelfile
# #
# #RUN /bin/bash -c "source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh && /usr/bin/ollama create ${model}:v1 -f /root/.ollama/models/Modelfile"

# ENTRYPOINT ["/bin/bash", "-c", "source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh && /usr/bin/ollama serve"]

FROM llm-base AS vllm

RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y \
    git \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log}

WORKDIR /opt

RUN git clone https://github.com/vllm-project/vllm.git

WORKDIR /opt/vllm

RUN wget -O - https://astral.sh/uv/install.sh | sh
ENV PATH=~/.local/bin:$PATH

RUN { \
    echo '#!/bin/bash' ; \
    echo 'source /opt/backstory/venv/bin/activate'; \
    echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \
    } > /opt/vllm/shell ; \
    chmod +x /opt/vllm/shell

RUN uv venv --python 3.12 --seed

SHELL [ "/opt/vllm/shell" ]

RUN pip install --upgrade pip ; \
    pip install -v -r requirements/xpu.txt
RUN VLLM_TARGET_DEVICE=xpu python setup.py install

SHELL [ "/bin/bash", "-c" ]

RUN { \
    echo '#!/bin/bash'; \
    echo 'echo "Container: vLLM"'; \
    echo 'set -e'; \
    echo 'source /opt/backstory/venv/bin/activate'; \
    echo 'while true; do'; \
    echo '  if [[ ! -e /opt/backstory/block-server ]]; then'; \
    echo '    echo "Launching vLLM server..."'; \
    echo '    python3 -m vllm.entrypoints.openai.api_server \'; \
    echo '      --model=Qwen/Qwen3-8b \' ; \
    echo '      --device xpu' ; \
    # echo '     --dtype=bfloat16 \' ; \
    # echo '     --max_model_len=1024 \' ; \
    # echo '     --distributed-executor-backend=ray \' ; \
    # echo '     --pipeline-parallel-size=2 \' ; \
    # echo '     -tp=1' ; \
    echo '    echo "Sleeping for 3 seconds."'; \
    echo '  else'; \
    echo '    if [[ ${once} -eq 0 ]]; then' ; \
    echo '      echo "/opt/vllm/block-server exists. Sleeping for 3 seconds."'; \
    echo '      once=1' ; \
    echo '    fi' ; \
    echo '  fi' ; \
    echo '  sleep 3'; \
    echo 'done' ; \
} > /entrypoint.sh \
    && chmod +x /entrypoint.sh

ENTRYPOINT [ "/entrypoint.sh" ]