backstory/llm-proxy.md

193 lines
5.4 KiB
Markdown

# Environment Configuration Examples
# ==== Development (Ollama only) ====
export OLLAMA_HOST="http://localhost:11434"
export DEFAULT_LLM_PROVIDER="ollama"
# ==== Production with OpenAI ====
export OPENAI_API_KEY="sk-your-openai-key-here"
export DEFAULT_LLM_PROVIDER="openai"
# ==== Production with Anthropic ====
export ANTHROPIC_API_KEY="sk-ant-your-anthropic-key-here"
export DEFAULT_LLM_PROVIDER="anthropic"
# ==== Production with multiple providers ====
export OPENAI_API_KEY="sk-your-openai-key-here"
export ANTHROPIC_API_KEY="sk-ant-your-anthropic-key-here"
export GEMINI_API_KEY="your-gemini-key-here"
export OLLAMA_HOST="http://ollama-server:11434"
export DEFAULT_LLM_PROVIDER="openai"
# ==== Docker Compose Example ====
# docker-compose.yml
version: '3.8'
services:
api:
build: .
ports:
- "8000:8000"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- DEFAULT_LLM_PROVIDER=openai
depends_on:
- ollama
ollama:
image: ollama/ollama
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
environment:
- OLLAMA_HOST=0.0.0.0
volumes:
ollama_data:
# ==== Kubernetes ConfigMap ====
apiVersion: v1
kind: ConfigMap
metadata:
name: llm-config
data:
DEFAULT_LLM_PROVIDER: "openai"
OLLAMA_HOST: "http://ollama-service:11434"
---
apiVersion: v1
kind: Secret
metadata:
name: llm-secrets
type: Opaque
stringData:
OPENAI_API_KEY: "sk-your-key-here"
ANTHROPIC_API_KEY: "sk-ant-your-key-here"
# ==== Python configuration example ====
# config.py
import os
from llm_proxy import get_llm, LLMProvider
def configure_llm_for_environment():
"""Configure LLM based on deployment environment"""
llm = get_llm()
# Development: Use Ollama
if os.getenv('ENVIRONMENT') == 'development':
llm.configure_provider(LLMProvider.OLLAMA, host='http://localhost:11434')
llm.set_default_provider(LLMProvider.OLLAMA)
# Staging: Use OpenAI with rate limiting
elif os.getenv('ENVIRONMENT') == 'staging':
llm.configure_provider(LLMProvider.OPENAI,
api_key=os.getenv('OPENAI_API_KEY'),
max_retries=3,
timeout=30)
llm.set_default_provider(LLMProvider.OPENAI)
# Production: Use multiple providers with fallback
elif os.getenv('ENVIRONMENT') == 'production':
# Primary: Anthropic
llm.configure_provider(LLMProvider.ANTHROPIC,
api_key=os.getenv('ANTHROPIC_API_KEY'))
# Fallback: OpenAI
llm.configure_provider(LLMProvider.OPENAI,
api_key=os.getenv('OPENAI_API_KEY'))
# Set primary provider
llm.set_default_provider(LLMProvider.ANTHROPIC)
# ==== Usage Examples ====
# Example 1: Basic usage with default provider
async def basic_example():
llm = get_llm()
response = await llm.chat(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.content)
# Example 2: Specify provider explicitly
async def provider_specific_example():
llm = get_llm()
# Use OpenAI specifically
response = await llm.chat(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}],
provider=LLMProvider.OPENAI
)
# Use Anthropic specifically
response2 = await llm.chat(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Hello!"}],
provider=LLMProvider.ANTHROPIC
)
# Example 3: Streaming with provider fallback
async def streaming_with_fallback():
llm = get_llm()
try:
async for chunk in llm.chat(
model="claude-3-sonnet-20240229",
messages=[{"role": "user", "content": "Write a story"}],
provider=LLMProvider.ANTHROPIC,
stream=True
):
print(chunk.content, end='', flush=True)
except Exception as e:
print(f"Primary provider failed: {e}")
# Fallback to OpenAI
async for chunk in llm.chat(
model="gpt-4",
messages=[{"role": "user", "content": "Write a story"}],
provider=LLMProvider.OPENAI,
stream=True
):
print(chunk.content, end='', flush=True)
# Example 4: Load balancing between providers
import random
async def load_balanced_request():
llm = get_llm()
available_providers = [LLMProvider.OPENAI, LLMProvider.ANTHROPIC]
# Simple random load balancing
provider = random.choice(available_providers)
# Adjust model based on provider
model_mapping = {
LLMProvider.OPENAI: "gpt-4",
LLMProvider.ANTHROPIC: "claude-3-sonnet-20240229"
}
response = await llm.chat(
model=model_mapping[provider],
messages=[{"role": "user", "content": "Hello!"}],
provider=provider
)
print(f"Response from {provider.value}: {response.content}")
# ==== FastAPI Startup Configuration ====
from fastapi import FastAPI
app = FastAPI()
@app.on_event("startup")
async def startup_event():
"""Configure LLM providers on application startup"""
configure_llm_for_environment()
# Verify configuration
llm = get_llm()
print(f"Configured providers: {[p.value for p in llm._initialized_providers]}")
print(f"Default provider: {llm.default_provider.value}")