# # Build Pyton 3.11 for use in later stages # FROM ubuntu:oracular AS python-local SHELL [ "/bin/bash", "-c" ] # Install some utilities frequently used RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ gpg \ wget \ nano \ rsync \ iputils-ping \ jq \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} # Install latest Python3 RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ python3 \ python3-pip \ python3-venv \ python3-dev FROM ubuntu:oracular AS ze-monitor # From https://github.com/jketreno/ze-monitor RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ build-essential \ debhelper \ devscripts \ cmake \ git \ libfmt-dev \ libncurses-dev \ rpm \ rpm2cpio \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} RUN apt-get install -y \ software-properties-common \ && add-apt-repository -y ppa:kobuk-team/intel-graphics \ && apt-get update \ && apt-get install -y \ libze-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} RUN git clone --depth 1 --branch v0.4.0-1 https://github.com/jketreno/ze-monitor /opt/ze-monitor WORKDIR /opt/ze-monitor/build RUN cmake .. \ && make \ && cpack # The main backstory image: # * python 3.11 # * pytorch xpu w/ ipex-llm # * ollama-ipex-llm # * src/server.py - model server supporting RAG and fine-tuned models # FROM python-local AS llm-base # Install Intel graphics runtimes RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \ && add-apt-repository -y ppa:kobuk-team/intel-graphics \ && apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ libze-intel-gpu1 \ libze1 \ intel-ocloc \ intel-opencl-icd \ xpu-smi \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} # pydub is loaded by torch, which will throw a warning if ffmpeg isn't installed RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \ && add-apt-repository -y ppa:kobuk-team/intel-graphics \ && apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ ffmpeg \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} # Prerequisite for ze-monitor RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ libncurses6 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} COPY --from=ze-monitor /opt/ze-monitor/build/ze-monitor-*deb /opt/ RUN dpkg -i /opt/ze-monitor-*deb && rm /opt/ze-monitor-*deb RUN usermod -aG ze-monitor root WORKDIR /opt/backstory # Setup the ollama python virtual environment RUN python3 -m venv --system-site-packages /opt/backstory/venv # Setup the docker pip shell RUN { \ echo '#!/bin/bash' ; \ echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ echo 'source /opt/backstory/venv/bin/activate' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \ } > /opt/backstory/shell ; \ chmod +x /opt/backstory/shell # Activate the pip environment on all shell calls SHELL [ "/opt/backstory/shell" ] # https://pytorch-extension.intel.com/installation?platform=gpu&version=v2.7.10%2Bxpu&os=linux%2Fwsl2&package=pip RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu RUN pip install intel-extension-for-pytorch==2.7.10+xpu oneccl_bind_pt==2.7.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ # From https://huggingface.co/docs/bitsandbytes/main/en/installation?backend=Intel+CPU+%2B+GPU#multi-backend # To use bitsandbytes non-CUDA backends, be sure to install: RUN pip install "transformers>=4.45.1" # Note, if you don't want to reinstall BNBs dependencies, append the `--no-deps` flag! RUN pip install --force-reinstall "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl" #RUN pip install --force-reinstall --no-deps "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-1.0.0-py3-none-manylinux_2_24_x86_64.whl" # Install ollama python module RUN pip install ollama langchain-ollama RUN pip install setuptools --upgrade RUN pip install ollama langchain-ollama RUN pip install feedparser bs4 chromadb RUN pip install tiktoken RUN pip install flask flask_cors flask_sock RUN pip install peft datasets #COPY --from=ipex-llm-src /opt/ipex-llm/python/llm/dist/*.whl /opt/wheels/ #RUN for pkg in /opt/wheels/ipex_llm*.whl; do pip install $pkg; done # mistral fails with cache_position errors with transformers>4.40 (or at least it fails with the latest) # as well as MistralSpda* and QwenSpda* things missing (needed when loading models with ) RUN pip install "sentence_transformers<3.4.1" # "transformers==4.40.0" "" #RUN pip install sentence_transformers "transformers==4.40.0" "trl<0.12.0" #RUN pip install transformers==4.45.0 "trl<0.12.0" # trl.core doesn't have what is needed with the default 'pip install trl' version #RUN pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c # To get xe_linear and other Xe methods # NOTE: As of 2025-03-10, these are only available for Python 3.11, hence # why we build python from source RUN pip3 install 'bigdl-core-xe-all>=2.6.0b' # NOTE: IPEX includes the oneAPI components... not sure if they still need to be installed separately with a oneAPI env # Required for IPEX optimize(), which is required to convert from Params4bit RUN pip install einops diffusers # For image generation... RUN pip install einops diffusers RUN pip install sentencepiece # Needed for FLUX RUN pip install timm # Install xformers from source RUN pip install --no-binary xformers xformers # Needed by src/utils/rag.py RUN pip install watchdog # Install packages needed for utils/tools/* RUN pip install yfinance pyzt geopy # Install packages needed for vector operations RUN pip install umap-learn # Needed for random name generation RUN pip install names-dataset FROM llm-base AS backstory SHELL [ "/opt/backstory/shell" ] #COPY /src/requirements.txt /opt/backstory/requirements.txt #RUN pip install -r /opt/backstory/requirements.txt RUN pip install 'markitdown[all]' pydantic 'pydantic[email]' # Prometheus RUN pip install prometheus-client prometheus-fastapi-instrumentator # Redis RUN pip install "redis[hiredis]>=4.5.0" # New backend implementation RUN pip install fastapi uvicorn "python-jose[cryptography]" bcrypt python-multipart schedule # Needed for email verification RUN pip install pyyaml user-agents cryptography # OpenAPI CLI generator RUN pip install openapi-python-client # QR code generator RUN pip install setuptools pyqrcode pypng # Anthropic and other backends RUN pip install anthropic pydantic_ai # Automatic type conversion pydantic -> typescript RUN pip install pydantic typing-inspect jinja2 RUN pip freeze > /opt/backstory/requirements.txt RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ nodejs \ npm \ && npm install -g typescript \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash'; \ echo 'echo "Container: backstory"'; \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/backstory"'; \ echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ echo 'if [[ ! -d /opt/backstory/venv/bin ]]; then'; \ echo ' python3 -m venv --system-site-packages /opt/backstory/venv'; \ echo ' pip install -r /opt/backstory/requirements.txt'; \ echo 'fi'; \ echo 'source /opt/backstory/venv/bin/activate'; \ echo ''; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ echo ' shift' ; \ echo ' echo "Running: ${@}"' ; \ echo ' if [[ "${1}" != "" ]]; then' ; \ echo ' bash -c "${@}"'; \ echo ' else' ; \ echo ' exec /bin/bash -i'; \ echo ' fi' ; \ echo 'else'; \ echo ' if [[ ! -e src/cert.pem ]]; then' ; \ echo ' echo "Generating self-signed certificate for HTTPS"'; \ echo ' openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout src/key.pem -out src/cert.pem -subj "/C=US/ST=OR/L=Portland/O=Development/CN=localhost"'; \ echo ' fi' ; \ echo ' declare once=0' ; \ echo ' while true; do'; \ echo ' if [[ ! -e /opt/backstory/block-server ]]; then'; \ echo ' echo "Launching Backstory server..."'; \ echo ' python3 src/backend/main.py "${@}" || echo "Backstory server died."'; \ echo ' echo "Sleeping for 3 seconds."'; \ echo ' else'; \ echo ' if [[ ${once} -eq 0 ]]; then' ; \ echo ' echo "/opt/backstory/block-server exists. Sleeping for 3 seconds."'; \ echo ' once=1' ; \ echo ' fi' ; \ echo ' fi' ; \ echo ' sleep 3'; \ echo ' done' ; \ echo 'fi'; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh # From ENV USE_XETLA=OFF ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 ENV SYCL_CACHE_PERSISTENT=1 ENV PATH=/opt/backstory:$PATH ENTRYPOINT [ "/entrypoint.sh" ] FROM backstory AS backstory-prod COPY /src/ /opt/backstory/src/ FROM ubuntu:oracular AS ollama # Get a couple prerequisites RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ gpg \ wget \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} # Install Intel graphics runtimes RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common \ && add-apt-repository -y ppa:kobuk-team/intel-graphics \ && apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ libze-intel-gpu1 \ libze1 \ intel-ocloc \ intel-opencl-icd \ xpu-smi \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} WORKDIR /opt/ollama # Download the nightly ollama release from ipex-llm # NOTE: NO longer at github.com/intel -- now at ipex-llm # This version does not work: ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.2.0/ollama-ipex-llm-2.2.0-ubuntu.tgz # Does not work -- crashes # ENV OLLAMA_VERSION=https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250612-ubuntu.tgz RUN wget -qO - ${OLLAMA_VERSION} | \ tar --strip-components=1 -C . -xzv # Install Python from Oracular (ollama works with 3.12) RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ gpg \ python3 \ python3-pip \ python3-venv \ wget \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} # Setup the ollama python virtual environment RUN python3 -m venv --system-site-packages /opt/ollama/venv # Setup the docker pip shell RUN { \ echo '#!/bin/bash' ; \ echo 'source /opt/ollama/venv/bin/activate' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \ } > /opt/ollama/shell ; \ chmod +x /opt/ollama/shell # Activate the pip environment on all shell calls SHELL [ "/opt/ollama/shell" ] # Install ollama python module RUN pip install ollama langchain-ollama SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash'; \ echo 'echo "Container: ollama"'; \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/ollama"'; \ echo 'source /opt/ollama/venv/bin/activate'; \ echo 'export OLLAMA_NUM_GPU=999'; \ echo 'export ZES_ENABLE_SYSMAN=1'; \ echo 'export SYCL_CACHE_PERSISTENT=1'; \ echo 'export OLLAMA_KEEP_ALIVE=-1'; \ echo 'export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1'; \ echo ''; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ echo ' shift'; \ echo ' if [[ "${1}" != "" ]]; then cmd="/opt/ollama/shell ${@}"; echo "Running: ${cmd}"; exec ${cmd}; else /opt/ollama/shell; fi'; \ echo 'else'; \ echo ' echo "Launching Ollama server..."'; \ echo ' exec ollama serve'; \ echo 'fi'; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh RUN { \ echo '#!/bin/bash'; \ echo 'echo "Container: ollama"'; \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/ollama"'; \ echo 'source /opt/ollama/venv/bin/activate'; \ echo 'ollama pull qwen2.5:7b' ; \ echo 'ollama pull llama3.2' ; \ echo 'ollama pull mxbai-embed-large' ; \ echo 'ollama pull deepseek-r1:7b' ; \ echo 'ollama pull mistral:7b' ; \ } > /fetch-models.sh \ && chmod +x /fetch-models.sh ENV PYTHONUNBUFFERED=1 # Enable ext_intel_free_memory ENV ZES_ENABLE_SYSMAN=1 # Use all GPUs ENV OLLAMA_NUM_GPU=999 # Use immediate command lists ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 # Use persistent cache ENV SYCL_CACHE_PERSISTENT=1 VOLUME [" /root/.ollama" ] ENV PATH=/opt/ollama:${PATH} ENTRYPOINT [ "/entrypoint.sh" ] FROM llm-base AS jupyter # npm and Node.JS are required for jupyterlab RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ nodejs \ npm \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} SHELL [ "/opt/backstory/shell" ] # BEGIN setup Jupyter RUN pip install \ jupyterlab \ dash[jupyterlab] \ && jupyter lab build --dev-build=False --minimize=False # END setup Jupyter #COPY /src/requirements.txt /opt/backstory/src/requirements.txt #RUN pip install -r /opt/backstory/src/requirements.txt SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash' ; \ echo 'echo "Container: backstory jupyter"' ; \ echo 'if [[ ! -e "/root/.cache/hub/token" ]]; then' ; \ echo ' if [[ "${HF_ACCESS_TOKEN}" == "" ]]; then' ; \ echo ' echo "Set your HF access token in .env as: HF_ACCESS_TOKEN=" >&2' ; \ echo ' exit 1' ; \ echo ' else' ; \ echo ' if [[ ! -d '/root/.cache/hub' ]]; then mkdir -p /root/.cache/hub; fi' ; \ echo ' echo "${HF_ACCESS_TOKEN}" > /root/.cache/hub/token' ; \ echo ' fi' ; \ echo 'fi' ; \ echo 'if [[ -e /opt/intel/oneapi/setvars.sh ]]; then source /opt/intel/oneapi/setvars.sh; fi' ; \ echo 'source /opt/backstory/venv/bin/activate' ; \ echo 'if [[ "${1}" == "shell" ]]; then echo "Dropping to shell"; /bin/bash -i; exit $?; fi' ; \ echo 'while true; do' ; \ echo ' echo "Launching jupyter lab"' ; \ echo ' jupyter lab \' ; \ echo ' --notebook-dir=/opt/jupyter \' ; \ echo ' --port 8888 \' ; \ echo ' --ip 0.0.0.0 \' ; \ echo ' --allow-root \' ; \ echo ' --ServerApp.token= \' ; \ echo ' --ServerApp.password= \' ; \ echo ' --ServerApp.allow_origin=* \' ; \ echo ' --ServerApp.base_url="/jupyter" \' ; \ echo ' "${@}" \' ; \ echo ' 2>&1 | tee -a "/root/.cache/jupyter.log"' ; \ echo ' echo "jupyter notebook died ($?). Restarting."' ; \ echo ' sleep 5' ; \ echo 'done' ; \ } > /entrypoint-jupyter.sh \ && chmod +x /entrypoint-jupyter.sh # echo ' --no-browser \' ; \ WORKDIR /opt/jupyter ENV PATH=/opt/backstory:$PATH ENTRYPOINT [ "/entrypoint-jupyter.sh" ] FROM python-local AS miniircd # Get a couple prerequisites RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ gpg \ wget \ nano \ irssi \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} WORKDIR /opt/miniircd # Setup the ollama python virtual environment RUN python3 -m venv --system-site-packages /opt/miniircd/venv # Setup the docker pip shell RUN { \ echo '#!/bin/bash' ; \ echo 'source /opt/miniircd/venv/bin/activate' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \ } > /opt/miniircd/shell ; \ chmod +x /opt/miniircd/shell # Activate the pip environment on all shell calls SHELL [ "/opt/miniircd/shell" ] RUN pip install miniircd SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash'; \ echo 'echo "Container: miniircd"'; \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/miniircd"'; \ echo 'source /opt/miniircd/venv/bin/activate'; \ echo ''; \ echo 'if [[ "${1}" == "/bin/bash -i" ]] || [[ "${1}" =~ ^(/opt/miniircd/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ echo ' shift' ; \ echo ' echo "Running: ${@}"' ; \ echo ' if [[ "${1}" != "" ]]; then' ; \ echo ' bash -c "${@}"'; \ echo ' else' ; \ echo ' exec /bin/bash -i'; \ echo ' fi' ; \ echo 'else'; \ echo ' echo "Launching IRC server..."'; \ echo ' miniircd --setuid root "${@}"' ; \ echo 'fi'; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh ENTRYPOINT [ "/entrypoint.sh" ] FROM ubuntu:oracular AS frontend # The client frontend is built using React Expo to allow # easy creation of an Android app as well as web app RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ rsync \ nano \ wget \ curl \ nodejs \ npm \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash'; \ echo 'echo "Container: frontend"'; \ echo 'set -e'; \ echo ''; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/backstory/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ echo ' shift' ; \ echo ' echo "Running: ${@}"' ; \ echo ' if [[ "${1}" != "" ]]; then' ; \ echo ' bash -c "${@}"'; \ echo ' else' ; \ echo ' exec /bin/bash -i'; \ echo ' fi' ; \ echo 'fi' ; \ echo 'cd /opt/backstory/frontend'; \ echo 'if [[ "${1}" == "install" ]] || [[ ! -d node_modules ]]; then'; \ echo ' echo "Installing node modules"'; \ echo ' if [[ -d node_modules ]]; then'; \ echo ' echo "Deleting current node_modules"'; \ echo ' rm -rf node_modules'; \ echo ' fi'; \ echo ' npm install --force'; \ echo 'fi'; \ echo 'if [[ "${1}" == "build" ]]; then'; \ echo ' echo "Building production static build"'; \ echo ' ./build.sh'; \ echo 'fi'; \ echo 'while true; do'; \ echo ' echo "Launching Backstory React Frontend..."'; \ echo ' npm start "${@}" || echo "Backstory frontend died. Restarting in 3 seconds."'; \ echo ' sleep 3'; \ echo 'done' ; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh WORKDIR /opt/backstory/frontend RUN { \ echo '#!/bin/bash' ; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \ } > /opt/backstory/shell ; \ chmod +x /opt/backstory/shell COPY /frontend/ /opt/backstory/frontend/ ENV PATH=/opt/backstory:$PATH ENTRYPOINT [ "/entrypoint.sh" ] # FROM ubuntu:24.04 AS ollama-ov-server # SHELL ["/bin/bash", "-c"] # RUN apt-get update && apt install -y software-properties-common libtbb-dev # RUN add-apt-repository ppa:deadsnakes/ppa \ # && apt-get update \ # && apt-get install -y python3.10 net-tools # RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 # RUN apt-get install -y ca-certificates git wget curl gcc g++ \ # && apt-get clean \ # && rm -rf /var/lib/apt/lists/* # WORKDIR /home/ollama_ov_server # ARG GOVERSION=1.24.1 # RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local # ENV PATH=/usr/local/go/bin:$PATH # RUN wget https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64.tar.gz # RUN tar -xzf openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64.tar.gz # ENV GENAI_DIR=/home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64 # RUN source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh # ENV CGO_ENABLED=1 # ENV GODEBUG=cgocheck=0 # ENV CGO_LDFLAGS=-L$GENAI_DIR/runtime/lib/intel64 # ENV CGO_CFLAGS=-I$GENAI_DIR/runtime/include # WORKDIR /home/ollama_ov_server # RUN git clone https://github.com/openvinotoolkit/openvino_contrib.git # WORKDIR /home/ollama_ov_server/openvino_contrib/modules/ollama_openvino # RUN go build -o /usr/bin/ollama . # ENV OLLAMA_HOST=0.0.0.0:11434 # EXPOSE 11434 # RUN apt-get update \ # && DEBIAN_FRONTEND=noninteractive apt-get install -y \ # pip \ # && apt-get clean \ # && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} # RUN pip install huggingface_hub modelscope # #ENV model=Qwen3-4B-int4-ov # #ENV model=Qwen3-8B-int4-ov -- didn't work # #RUN huggingface-cli download OpenVINO/${model} # #RUN modelscope download --model OpenVINO/${model} --local_dir ./${model} # #RUN tar -zcvf /root/.ollama/models/${model}.tar.gz /root/.cache/hub/models--OpenVINO--${model} # #RUN { \ # # echo "FROM ${model}.tar.gz" ; \ # # echo "ModelType 'OpenVINO'" ; \ # #} > /root/.ollama/models/Modelfile # # # #RUN /bin/bash -c "source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh && /usr/bin/ollama create ${model}:v1 -f /root/.ollama/models/Modelfile" # ENTRYPOINT ["/bin/bash", "-c", "source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh && /usr/bin/ollama serve"] FROM llm-base AS vllm RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ git \ && apt-get clean \ && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} WORKDIR /opt RUN git clone https://github.com/vllm-project/vllm.git WORKDIR /opt/vllm RUN wget -O - https://astral.sh/uv/install.sh | sh ENV PATH=~/.local/bin:$PATH RUN { \ echo '#!/bin/bash' ; \ echo 'source /opt/backstory/venv/bin/activate'; \ echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \ } > /opt/vllm/shell ; \ chmod +x /opt/vllm/shell RUN uv venv --python 3.12 --seed SHELL [ "/opt/vllm/shell" ] RUN pip install --upgrade pip ; \ pip install -v -r requirements/xpu.txt RUN VLLM_TARGET_DEVICE=xpu python setup.py install SHELL [ "/bin/bash", "-c" ] RUN { \ echo '#!/bin/bash'; \ echo 'echo "Container: vLLM"'; \ echo 'set -e'; \ echo 'source /opt/backstory/venv/bin/activate'; \ echo 'while true; do'; \ echo ' if [[ ! -e /opt/backstory/block-server ]]; then'; \ echo ' echo "Launching vLLM server..."'; \ echo ' python3 -m vllm.entrypoints.openai.api_server \'; \ echo ' --model=Qwen/Qwen3-8b \' ; \ echo ' --device xpu' ; \ # echo ' --dtype=bfloat16 \' ; \ # echo ' --max_model_len=1024 \' ; \ # echo ' --distributed-executor-backend=ray \' ; \ # echo ' --pipeline-parallel-size=2 \' ; \ # echo ' -tp=1' ; \ echo ' echo "Sleeping for 3 seconds."'; \ echo ' else'; \ echo ' if [[ ${once} -eq 0 ]]; then' ; \ echo ' echo "/opt/vllm/block-server exists. Sleeping for 3 seconds."'; \ echo ' once=1' ; \ echo ' fi' ; \ echo ' fi' ; \ echo ' sleep 3'; \ echo 'done' ; \ } > /entrypoint.sh \ && chmod +x /entrypoint.sh ENTRYPOINT [ "/entrypoint.sh" ]