diff --git a/Dockerfile b/Dockerfile index 12d781b..0dbb6b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -582,3 +582,117 @@ COPY /frontend/ /opt/backstory/frontend/ ENV PATH=/opt/backstory:$PATH ENTRYPOINT [ "/entrypoint.sh" ] + + +FROM ubuntu:24.04 AS ollama-ov-server + +SHELL ["/bin/bash", "-c"] + +RUN apt-get update && apt install -y software-properties-common libtbb-dev +RUN add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y python3.10 net-tools +RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 + +RUN apt-get install -y ca-certificates git wget curl gcc g++ \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /home/ollama_ov_server +ARG GOVERSION=1.24.1 +RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH + +RUN wget https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/nightly/2025.2.0.0.dev20250513/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64.tar.gz +RUN tar -xzf openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64.tar.gz +ENV GENAI_DIR=/home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64 + +RUN source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh + +ENV CGO_ENABLED=1 +ENV GODEBUG=cgocheck=0 + +ENV CGO_LDFLAGS=-L$GENAI_DIR/runtime/lib/intel64 +ENV CGO_CFLAGS=-I$GENAI_DIR/runtime/include + +WORKDIR /home/ollama_ov_server +RUN git clone https://github.com/openvinotoolkit/openvino_contrib.git +WORKDIR /home/ollama_ov_server/openvino_contrib/modules/ollama_openvino + +RUN go build -o /usr/bin/ollama . + +ENV OLLAMA_HOST=0.0.0.0:11434 +EXPOSE 11434 + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + pip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} + +RUN pip install huggingface_hub modelscope + +#ENV model=Qwen3-4B-int4-ov +#ENV model=Qwen3-8B-int4-ov -- didn't work +#RUN huggingface-cli download OpenVINO/${model} +#RUN modelscope download --model OpenVINO/${model} --local_dir ./${model} + +#RUN tar -zcvf /root/.ollama/models/${model}.tar.gz /root/.cache/hub/models--OpenVINO--${model} +#RUN { \ +# echo "FROM ${model}.tar.gz" ; \ +# echo "ModelType 'OpenVINO'" ; \ +#} > /root/.ollama/models/Modelfile +# +#RUN /bin/bash -c "source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh && /usr/bin/ollama create ${model}:v1 -f /root/.ollama/models/Modelfile" + +ENTRYPOINT ["/bin/bash", "-c", "source /home/ollama_ov_server/openvino_genai_ubuntu24_2025.2.0.0.dev20250513_x86_64/setupvars.sh && /usr/bin/ollama serve"] + +FROM python AS vllm + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + git \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/{apt,dpkg,cache,log} + +WORKDIR /opt + +RUN git clone https://github.com/vllm-project/vllm.git + +WORKDIR /opt/vllm + +RUN wget -O - https://astral.sh/uv/install.sh | sh +ENV PATH=~/.local/bin:$PATH + +RUN { \ + echo '#!/bin/bash' ; \ + echo 'source /opt/vllm/.venv/bin/activate' ; \ + echo 'if [[ "${1}" != "" ]]; then bash -c "${@}"; else bash -i; fi' ; \ + } > /opt/vllm/shell ; \ + chmod +x /opt/vllm/shell + +RUN uv venv --python 3.12 --seed + +SHELL [ "/opt/vllm/shell" ] + +RUN pip install --upgrade pip ; \ + pip install -v -r requirements/xpu.txt +RUN VLLM_TARGET_DEVICE=xpu python setup.py install + +SHELL [ "/bin/bash", "-c" ] + +RUN { \ + echo '#!/bin/bash'; \ + echo 'echo "Container: vLLM"'; \ + echo 'set -e'; \ + echo 'python -m vllm.entrypoints.openai.api_server \'; \ + echo ' --model=facebook/opt-13b \' ; \ + echo ' --dtype=bfloat16 \' ; \ + echo ' --max_model_len=1024 \' ; \ + echo ' --distributed-executor-backend=ray \' ; \ + echo ' --pipeline-parallel-size=2 \' ; \ + echo ' -tp=8' ; \ +} > /entrypoint.sh \ + && chmod +x /entrypoint.sh + +ENTRYPOINT [ "/entrypoint.sh" ] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 4f9c7b5..39f25e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,9 +18,13 @@ services: # To use Anthropic, uncomment the following lines and comment out the OpenAI lines # - DEFAULT_LLM_PROVIDER=anthropic # - MODEL_NAME=claude-3-5-haiku-latest - - DEFAULT_LLM_PROVIDER=openai + - DEFAULT_LLM_PROVIDER=ollama - MODEL_NAME=${MODEL_NAME:-qwen2.5:7b} - - OPENAI_URL=http://ollama:11434 + - OLLAMA_HOST=http://ollama:11434 +# Test with OpenVINO; it doesn't work though +# - MODEL_NAME=Qwen3-4B-int4-ov:v1 +# - OLLAMA_HOST=http://ollama-ov-server:11434 + devices: - /dev/dri:/dev/dri depends_on: @@ -60,9 +64,9 @@ services: - REDIS_URL=redis://redis:6379 - REDIS_DB=1 - SSL_ENABLED=false - - DEFAULT_LLM_PROVIDER=openai + - DEFAULT_LLM_PROVIDER=ollama - MODEL_NAME=${MODEL_NAME:-qwen2.5:7b} - - OPENAI_URL=http://ollama:11434 + - OLLAMA_HOST=http://ollama:11434 devices: - /dev/dri:/dev/dri depends_on: @@ -184,6 +188,50 @@ services: - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check + ollama-ov-server: + build: + context: . + dockerfile: Dockerfile + target: ollama-ov-server + container_name: ollama-ov-server + restart: "no" + env_file: + - .env + environment: + - OLLAMA_HOST=0.0.0.0 + - ONEAPI_DEVICE_SELECTOR=level_zero:0 + devices: + - /dev/dri:/dev/dri + ports: + - 11435:11434 # ollama serve port + networks: + - internal + volumes: + - ./cache:/root/.cache # Cache hub models and neo_compiler_cache + - ./ollama:/root/.ollama # Cache the ollama models + + vllm: + build: + context: . + dockerfile: Dockerfile + target: vllm + container_name: vllm + restart: "always" + env_file: + - .env + environment: + - OLLAMA_HOST=0.0.0.0 +# - ONEAPI_DEVICE_SELECTOR=level_zero:0 + devices: + - /dev/dri:/dev/dri + ports: + - 11438:8000 # ollama serve port + networks: + - internal + volumes: + - ./cache:/root/.cache # Cache hub models and neo_compiler_cache + - ./ollama:/root/.ollama # Cache the ollama models + jupyter: build: context: .