llm/docker-compose.yml
2025-07-29 16:55:47 -07:00

128 lines
3.5 KiB
YAML

services:
# This doesn't work...
# ollama-intel:
# image: intelanalytics/ipex-llm-inference-cpp-xpu:latest
# container_name: ollama-intel
# restart: unless-stopped
# env_file:
# - .env
# devices:
# - /dev/dri:/dev/dri
# volumes:
# - ./cache:/root/.cache # Cache hub models and neo_compiler_cache
# - ./ollama:/root/.ollama # Cache the ollama models
# ports:
# - 11434:11434
# environment:
# - OLLAMA_HOST=0.0.0.0
# - DEVICE=Arc
# - OLLAMA_INTEL_GPU=true
# - OLLAMA_NUM_GPU=999
# - ZES_ENABLE_SYSMAN=1
# - ONEAPI_DEVICE_SELECTOR=level_zero:0
# - TZ=America/Los_Angeles
# command: sh -c 'mkdir -p /llm/ollama && cd /llm/ollama && init-ollama && exec ./ollama serve'
ollama:
build:
context: .
dockerfile: Dockerfile
target: ollama
container_name: ollama
restart: "always"
env_file:
- .env
environment:
- OLLAMA_HOST=0.0.0.0
- ONEAPI_DEVICE_SELECTOR=level_zero:0
devices:
- /dev/dri:/dev/dri
ports:
- 11434:11434 # ollama serve port
networks:
- internal
volumes:
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache
- ./ollama:/root/.ollama # Cache the ollama models
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
# ollama-ov-server:
# build:
# context: .
# dockerfile: Dockerfile
# target: ollama-ov-server
# container_name: ollama-ov-server
# restart: "no"
# env_file:
# - .env
# environment:
# - OLLAMA_HOST=0.0.0.0
# - ONEAPI_DEVICE_SELECTOR=level_zero:0
# devices:
# - /dev/dri:/dev/dri
# ports:
# - 11435:11434 # ollama serve port
# networks:
# - internal
# volumes:
# - ./cache:/root/.cache # Cache hub models and neo_compiler_cache
# - ./ollama:/root/.ollama # Cache the ollama models
vllm:
build:
context: .
dockerfile: Dockerfile.xpu
target: vllm-openai
container_name: vllm-openai
restart: "no"
shm_size: 10.24gb
env_file:
- .env
environment:
- OLLAMA_HOST=0.0.0.0
# - ONEAPI_DEVICE_SELECTOR=level_zero:0
- ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
- ZE_AFFINITY_MASK=0.0
- CCL_LOG_LEVEL=INFO
devices:
- /dev:/dev
# group_add:
# - render
# - video
ports:
- 11438:8000 # ollama serve port
networks:
- internal
volumes:
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache
- ./ollama:/root/.ollama # Cache the ollama models
- /sys:/sys # Required so oneAPI can read PCI paths for Battlemage
privileged: true
cap_add: # used for running ze-monitor within container
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
phoenix:
image: arizephoenix/phoenix:latest
container_name: phoenix
restart: "always"
env_file:
- .env
volumes:
- ./db:/opt/phoenix/data
ports:
- 6006:6006 # Phoenix UI port
networks:
internal:
driver: bridge
volumes:
redis_data:
driver: local