backstory/llm-proxy.md

5.4 KiB

Environment Configuration Examples

==== Development (Ollama only) ====

export OLLAMA_HOST="http://localhost:11434" export DEFAULT_LLM_PROVIDER="ollama"

==== Production with OpenAI ====

export OPENAI_API_KEY="sk-your-openai-key-here" export DEFAULT_LLM_PROVIDER="openai"

==== Production with Anthropic ====

export ANTHROPIC_API_KEY="sk-ant-your-anthropic-key-here" export DEFAULT_LLM_PROVIDER="anthropic"

==== Production with multiple providers ====

export OPENAI_API_KEY="sk-your-openai-key-here" export ANTHROPIC_API_KEY="sk-ant-your-anthropic-key-here" export GEMINI_API_KEY="your-gemini-key-here" export OLLAMA_HOST="http://ollama-server:11434" export DEFAULT_LLM_PROVIDER="openai"

==== Docker Compose Example ====

docker-compose.yml

version: '3.8' services: api: build: . ports: - "8000:8000" environment: - OPENAI_API_KEY=${OPENAI_API_KEY} - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - DEFAULT_LLM_PROVIDER=openai depends_on: - ollama

ollama: image: ollama/ollama ports: - "11434:11434" volumes: - ollama_data:/root/.ollama environment: - OLLAMA_HOST=0.0.0.0

volumes: ollama_data:

==== Kubernetes ConfigMap ====

apiVersion: v1 kind: ConfigMap metadata: name: llm-config data: DEFAULT_LLM_PROVIDER: "openai" OLLAMA_HOST: "http://ollama-service:11434"


apiVersion: v1 kind: Secret metadata: name: llm-secrets type: Opaque stringData: OPENAI_API_KEY: "sk-your-key-here" ANTHROPIC_API_KEY: "sk-ant-your-key-here"

==== Python configuration example ====

config.py

import os from llm_proxy import get_llm, LLMProvider

def configure_llm_for_environment(): """Configure LLM based on deployment environment""" llm = get_llm()

# Development: Use Ollama
if os.getenv('ENVIRONMENT') == 'development':
    llm.configure_provider(LLMProvider.OLLAMA, host='http://localhost:11434')
    llm.set_default_provider(LLMProvider.OLLAMA)

# Staging: Use OpenAI with rate limiting
elif os.getenv('ENVIRONMENT') == 'staging':
    llm.configure_provider(LLMProvider.OPENAI, 
                          api_key=os.getenv('OPENAI_API_KEY'),
                          max_retries=3,
                          timeout=30)
    llm.set_default_provider(LLMProvider.OPENAI)

# Production: Use multiple providers with fallback
elif os.getenv('ENVIRONMENT') == 'production':
    # Primary: Anthropic
    llm.configure_provider(LLMProvider.ANTHROPIC,
                          api_key=os.getenv('ANTHROPIC_API_KEY'))
    
    # Fallback: OpenAI
    llm.configure_provider(LLMProvider.OPENAI,
                          api_key=os.getenv('OPENAI_API_KEY'))
    
    # Set primary provider
    llm.set_default_provider(LLMProvider.ANTHROPIC)

==== Usage Examples ====

Example 1: Basic usage with default provider

async def basic_example(): llm = get_llm()

response = await llm.chat(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello!"}]
)
print(response.content)

Example 2: Specify provider explicitly

async def provider_specific_example(): llm = get_llm()

# Use OpenAI specifically
response = await llm.chat(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello!"}],
    provider=LLMProvider.OPENAI
)

# Use Anthropic specifically
response2 = await llm.chat(
    model="claude-3-sonnet-20240229",
    messages=[{"role": "user", "content": "Hello!"}],
    provider=LLMProvider.ANTHROPIC
)

Example 3: Streaming with provider fallback

async def streaming_with_fallback(): llm = get_llm()

try:
    async for chunk in llm.chat(
        model="claude-3-sonnet-20240229",
        messages=[{"role": "user", "content": "Write a story"}],
        provider=LLMProvider.ANTHROPIC,
        stream=True
    ):
        print(chunk.content, end='', flush=True)
except Exception as e:
    print(f"Primary provider failed: {e}")
    # Fallback to OpenAI
    async for chunk in llm.chat(
        model="gpt-4",
        messages=[{"role": "user", "content": "Write a story"}],
        provider=LLMProvider.OPENAI,
        stream=True
    ):
        print(chunk.content, end='', flush=True)

Example 4: Load balancing between providers

import random

async def load_balanced_request(): llm = get_llm() available_providers = [LLMProvider.OPENAI, LLMProvider.ANTHROPIC]

# Simple random load balancing
provider = random.choice(available_providers)

# Adjust model based on provider
model_mapping = {
    LLMProvider.OPENAI: "gpt-4",
    LLMProvider.ANTHROPIC: "claude-3-sonnet-20240229"
}

response = await llm.chat(
    model=model_mapping[provider],
    messages=[{"role": "user", "content": "Hello!"}],
    provider=provider
)

print(f"Response from {provider.value}: {response.content}")

==== FastAPI Startup Configuration ====

from fastapi import FastAPI

app = FastAPI()

@app.on_event("startup") async def startup_event(): """Configure LLM providers on application startup""" configure_llm_for_environment()

# Verify configuration
llm = get_llm()
print(f"Configured providers: {[p.value for p in llm._initialized_providers]}")
print(f"Default provider: {llm.default_provider.value}")