services: # This doesn't work... # ollama-intel: # image: intelanalytics/ipex-llm-inference-cpp-xpu:latest # container_name: ollama-intel # restart: unless-stopped # env_file: # - .env # devices: # - /dev/dri:/dev/dri # volumes: # - ./cache:/root/.cache # Cache hub models and neo_compiler_cache # - ./ollama:/root/.ollama # Cache the ollama models # ports: # - 11434:11434 # environment: # - OLLAMA_HOST=0.0.0.0 # - DEVICE=Arc # - OLLAMA_INTEL_GPU=true # - OLLAMA_NUM_GPU=999 # - ZES_ENABLE_SYSMAN=1 # - ONEAPI_DEVICE_SELECTOR=level_zero:0 # - TZ=America/Los_Angeles # command: sh -c 'mkdir -p /llm/ollama && cd /llm/ollama && init-ollama && exec ./ollama serve' ollama: build: context: . dockerfile: Dockerfile target: ollama container_name: ollama restart: "always" env_file: - .env environment: - OLLAMA_HOST=0.0.0.0 - ONEAPI_DEVICE_SELECTOR=level_zero:0 devices: - /dev/dri:/dev/dri ports: - 11434:11434 # ollama serve port networks: - internal volumes: - ./cache:/root/.cache # Cache hub models and neo_compiler_cache - ./ollama:/root/.ollama # Cache the ollama models cap_add: # used for running ze-monitor within container - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check # ollama-ov-server: # build: # context: . # dockerfile: Dockerfile # target: ollama-ov-server # container_name: ollama-ov-server # restart: "no" # env_file: # - .env # environment: # - OLLAMA_HOST=0.0.0.0 # - ONEAPI_DEVICE_SELECTOR=level_zero:0 # devices: # - /dev/dri:/dev/dri # ports: # - 11435:11434 # ollama serve port # networks: # - internal # volumes: # - ./cache:/root/.cache # Cache hub models and neo_compiler_cache # - ./ollama:/root/.ollama # Cache the ollama models ollama-context-proxy: build: context: ./ollama-context-proxy dockerfile: Dockerfile container_name: ollama-context-proxy restart: "always" env_file: - .env environment: - OLLAMA_BASE_URL=http://ollama:11434 volumes: - ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw # Live mount for python venv - ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw - ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw - ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw ports: - 11436:11435 # ollama-context-proxy port networks: - internal vllm: build: context: . dockerfile: Dockerfile.xpu target: vllm-openai container_name: vllm-openai restart: "no" shm_size: 10.24gb env_file: - .env environment: - OLLAMA_HOST=0.0.0.0 # - ONEAPI_DEVICE_SELECTOR=level_zero:0 - ZE_ENABLE_PCI_ID_DEVICE_ORDER=1 - ZE_AFFINITY_MASK=0.0 - CCL_LOG_LEVEL=INFO devices: - /dev:/dev # group_add: # - render # - video ports: - 11438:8000 # ollama serve port networks: - internal volumes: - ./cache:/root/.cache # Cache hub models and neo_compiler_cache - ./ollama:/root/.ollama # Cache the ollama models - /sys:/sys # Required so oneAPI can read PCI paths for Battlemage privileged: true cap_add: # used for running ze-monitor within container - CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks - CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN) - CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check phoenix: image: arizephoenix/phoenix:latest container_name: phoenix restart: "always" env_file: - .env volumes: - ./db:/opt/phoenix/data ports: - 6006:6006 # Phoenix UI port networks: internal: driver: bridge volumes: redis_data: driver: local