143 lines
3.8 KiB
YAML
143 lines
3.8 KiB
YAML
services:
|
|
# This doesn't work...
|
|
# ollama-intel:
|
|
# image: intelanalytics/ipex-llm-inference-cpp-xpu:latest
|
|
# container_name: ollama-intel
|
|
# restart: unless-stopped
|
|
# env_file:
|
|
# - .env
|
|
# devices:
|
|
# - /dev/dri:/dev/dri
|
|
# volumes:
|
|
# - ./cache:/root/.cache # Cache hub models and neo_compiler_cache
|
|
# - ./ollama:/root/.ollama # Cache the ollama models
|
|
# ports:
|
|
# - 11434:11434
|
|
# environment:
|
|
# - OLLAMA_HOST=0.0.0.0
|
|
# - DEVICE=Arc
|
|
# - OLLAMA_INTEL_GPU=true
|
|
# - OLLAMA_NUM_GPU=999
|
|
# - ZES_ENABLE_SYSMAN=1
|
|
# - ONEAPI_DEVICE_SELECTOR=level_zero:0
|
|
# - TZ=America/Los_Angeles
|
|
# command: sh -c 'mkdir -p /llm/ollama && cd /llm/ollama && init-ollama && exec ./ollama serve'
|
|
|
|
ollama:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
target: ollama
|
|
container_name: ollama
|
|
restart: "always"
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- OLLAMA_HOST=0.0.0.0
|
|
- ONEAPI_DEVICE_SELECTOR=level_zero:0
|
|
devices:
|
|
- /dev/dri:/dev/dri
|
|
ports:
|
|
- 11434:11434 # ollama serve port
|
|
networks:
|
|
- internal
|
|
volumes:
|
|
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache
|
|
- ./ollama:/root/.ollama # Cache the ollama models
|
|
cap_add: # used for running ze-monitor within container
|
|
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
|
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
|
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
|
|
|
|
# ollama-ov-server:
|
|
# build:
|
|
# context: .
|
|
# dockerfile: Dockerfile
|
|
# target: ollama-ov-server
|
|
# container_name: ollama-ov-server
|
|
# restart: "no"
|
|
# env_file:
|
|
# - .env
|
|
# environment:
|
|
# - OLLAMA_HOST=0.0.0.0
|
|
# - ONEAPI_DEVICE_SELECTOR=level_zero:0
|
|
# devices:
|
|
# - /dev/dri:/dev/dri
|
|
# ports:
|
|
# - 11435:11434 # ollama serve port
|
|
# networks:
|
|
# - internal
|
|
# volumes:
|
|
# - ./cache:/root/.cache # Cache hub models and neo_compiler_cache
|
|
# - ./ollama:/root/.ollama # Cache the ollama models
|
|
|
|
ollama-context-proxy:
|
|
build:
|
|
context: ./ollama-context-proxy
|
|
dockerfile: Dockerfile
|
|
container_name: ollama-context-proxy
|
|
restart: "always"
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- OLLAMA_HOST=http://ollama:11434
|
|
ports:
|
|
- 11436:11434 # ollama-context-proxy port
|
|
networks:
|
|
- internal
|
|
|
|
vllm:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.xpu
|
|
target: vllm-openai
|
|
container_name: vllm-openai
|
|
restart: "no"
|
|
shm_size: 10.24gb
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- OLLAMA_HOST=0.0.0.0
|
|
# - ONEAPI_DEVICE_SELECTOR=level_zero:0
|
|
- ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
|
- ZE_AFFINITY_MASK=0.0
|
|
- CCL_LOG_LEVEL=INFO
|
|
devices:
|
|
- /dev:/dev
|
|
# group_add:
|
|
# - render
|
|
# - video
|
|
ports:
|
|
- 11438:8000 # ollama serve port
|
|
networks:
|
|
- internal
|
|
volumes:
|
|
- ./cache:/root/.cache # Cache hub models and neo_compiler_cache
|
|
- ./ollama:/root/.ollama # Cache the ollama models
|
|
- /sys:/sys # Required so oneAPI can read PCI paths for Battlemage
|
|
privileged: true
|
|
cap_add: # used for running ze-monitor within container
|
|
- CAP_DAC_READ_SEARCH # Bypass all filesystem read access checks
|
|
- CAP_PERFMON # Access to perf_events (vs. overloaded CAP_SYS_ADMIN)
|
|
- CAP_SYS_PTRACE # PTRACE_MODE_READ_REALCREDS ptrace access mode check
|
|
|
|
|
|
phoenix:
|
|
image: arizephoenix/phoenix:latest
|
|
container_name: phoenix
|
|
restart: "always"
|
|
env_file:
|
|
- .env
|
|
volumes:
|
|
- ./db:/opt/phoenix/data
|
|
ports:
|
|
- 6006:6006 # Phoenix UI port
|
|
|
|
networks:
|
|
internal:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
redis_data:
|
|
driver: local
|