Fixed context proxy

This commit is contained in:
James Ketr 2025-08-01 10:22:33 -07:00
parent 8119cd8492
commit 2d2745a788
6 changed files with 296 additions and 20 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ db/**
cache/**
ollama/**
.env
**/venv/**

View File

@ -80,9 +80,14 @@ services:
env_file:
- .env
environment:
- OLLAMA_HOST=http://ollama:11434
- OLLAMA_BASE_URL=http://ollama:11434
volumes:
- ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw # Live mount for python venv
- ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw
- ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw
- ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw
ports:
- 11436:11434 # ollama-context-proxy port
- 11436:11435 # ollama-context-proxy port
networks:
- internal

View File

@ -38,7 +38,14 @@ RUN { \
echo 'echo "Container: ollama-context-proxy"'; \
echo 'set -e'; \
echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \
echo 'if [[ ! -d /opt/job_hunter/venv/bin ]]; then'; \
echo ' echo "Creating virtual environment at /opt/ollama-context-proxy/venv"'; \
echo ' python3 -m venv --clear --system-site-packages /opt/ollama-context-proxy/venv'; \
echo 'fi'; \
echo 'echo "Activating virtual environment at /opt/ollama-context-proxy/venv"'; \
echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \
echo 'echo "Installing requirements from /opt/ollama-context-proxy/requirements.txt"'; \
echo 'pip install -r /opt/ollama-context-proxy/requirements.txt >/dev/null || echo "Failed"'; \
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \
echo ' echo "Dropping to shell"'; \
echo ' shift'; \

View File

@ -33,7 +33,7 @@ class OllamaContextProxy:
else:
# Fall back to host/port construction
if ollama_host is None:
ollama_host = "localhost"
ollama_host = "ollama"
self.ollama_base_url = f"http://{ollama_host}:{ollama_port}"
self.proxy_port = proxy_port
@ -70,9 +70,52 @@ class OllamaContextProxy:
# Optional: Add a health check endpoint
app.router.add_get("/", self.health_check)
app.router.add_get("/health", self.health_check)
app.router.add_get("/debug/ollama", self.debug_ollama)
return app
async def debug_ollama(self, request: web.Request) -> web.Response:
"""Debug endpoint to test connectivity to Ollama"""
if not self.session:
return web.Response(
text="Error: HTTP session not initialized",
status=500,
content_type="text/plain",
)
test_url = f"{self.ollama_base_url}/api/tags"
try:
# Test basic connectivity to Ollama
self.logger.info(f"Testing Ollama connectivity to: {test_url}")
async with self.session.get(test_url) as response:
status = response.status
content_type = response.headers.get("content-type", "N/A")
body = await response.text()
return web.Response(
text=f"Ollama Debug Test\n"
f"=================\n"
f"Target URL: {test_url}\n"
f"Status: {status}\n"
f"Content-Type: {content_type}\n"
f"Body Length: {len(body)}\n"
f"Body Preview: {body[:500]}...\n"
f"\nProxy Base URL: {self.ollama_base_url}\n"
f"Available Contexts: {self.available_contexts}",
content_type="text/plain",
)
except Exception as e:
return web.Response(
text=f"Ollama Debug Test FAILED\n"
f"========================\n"
f"Error: {str(e)}\n"
f"Target URL: {test_url}\n"
f"Proxy Base URL: {self.ollama_base_url}",
status=502,
content_type="text/plain",
)
async def health_check(self, request: web.Request) -> web.Response:
"""Health check endpoint"""
return web.Response(
@ -86,7 +129,9 @@ class OllamaContextProxy:
content_type="text/plain",
)
async def proxy_handler(self, request: web.Request) -> web.Response:
async def proxy_handler(
self, request: web.Request
) -> web.Response | web.StreamResponse:
"""Handle all proxy requests with context size extraction or auto-detection"""
# Extract context spec and remaining path
@ -97,18 +142,32 @@ class OllamaContextProxy:
if remaining_path.startswith("/"):
remaining_path = remaining_path[1:]
# Get request data first (needed for auto-sizing)
# Get request data first (needed for auto-sizing) - read only once!
original_data = None
request_body = None
if request.content_type == "application/json":
try:
data = await request.json()
except json.JSONDecodeError:
data = await request.text()
original_data = await request.json()
# Convert back to bytes for forwarding
request_body = json.dumps(original_data).encode("utf-8")
except json.JSONDecodeError as e:
self.logger.error(f"Failed to parse JSON: {e}")
request_body = await request.read()
original_data = request_body.decode("utf-8", errors="ignore")
else:
data = await request.read()
request_body = await request.read()
original_data = request_body
# Use original_data for analysis, request_body for forwarding
data_for_analysis = original_data if original_data is not None else {}
data_for_forwarding = request_body if request_body is not None else b""
# Determine context size
if context_spec == "auto":
context_size = self._auto_determine_context_size(data, remaining_path)
context_size = self._auto_determine_context_size(
data_for_analysis, remaining_path
)
else:
context_size = int(context_spec)
@ -130,22 +189,57 @@ class OllamaContextProxy:
else:
target_url = f"{self.ollama_base_url}/{remaining_path}"
self.logger.info(f"Routing to context {context_size} -> {target_url}")
# Enhanced debugging
self.logger.info("=== REQUEST DEBUG ===")
self.logger.info(f"Original request path: {request.path}")
self.logger.info(f"Context spec: {context_spec}")
self.logger.info(f"Remaining path: '{remaining_path}'")
self.logger.info(f"Target URL: {target_url}")
self.logger.info(f"Request method: {request.method}")
self.logger.info(f"Request headers: {dict(request.headers)}")
self.logger.info(f"Request query params: {dict(request.query)}")
self.logger.info(f"Content type: {request.content_type}")
if isinstance(data_for_analysis, dict):
self.logger.info(f"Request data keys: {list(data_for_analysis.keys())}")
else:
data_len = (
len(data_for_analysis)
if hasattr(data_for_analysis, "__len__")
else "N/A"
)
self.logger.info(
f"Request data type: {type(data_for_analysis)}, length: {data_len}"
)
self.logger.info(f"Selected context size: {context_size}")
# Inject context if needed
if self._should_inject_context(remaining_path) and isinstance(data, dict):
if "options" not in data:
data["options"] = {}
data["options"]["num_ctx"] = context_size
# Inject context if needed (modify the JSON data, not the raw bytes)
modified_data = False
if self._should_inject_context(remaining_path) and isinstance(
data_for_analysis, dict
):
if "options" not in data_for_analysis:
data_for_analysis["options"] = {}
data_for_analysis["options"]["num_ctx"] = context_size
self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}")
# Re-encode the modified JSON
data_for_forwarding = json.dumps(data_for_analysis).encode("utf-8")
modified_data = True
# Prepare headers (exclude hop-by-hop headers)
headers = {
key: value
for key, value in request.headers.items()
if key.lower() not in ["host", "connection", "upgrade"]
if key.lower() not in ["host", "connection", "upgrade", "content-length"]
}
# Update Content-Length if we modified the data
if modified_data and isinstance(data_for_forwarding, bytes):
headers["Content-Length"] = str(len(data_for_forwarding))
# Debug the final data being sent
self.logger.debug(f"Final data being sent: {data_for_forwarding}")
self.logger.debug(f"Final headers: {headers}")
if not self.session:
raise RuntimeError("HTTP session not initialized")
try:
@ -153,10 +247,28 @@ class OllamaContextProxy:
async with self.session.request(
method=request.method,
url=target_url,
data=json.dumps(data) if isinstance(data, dict) else data,
data=data_for_forwarding,
headers=headers,
params=request.query,
) as response:
# Enhanced response debugging
self.logger.info("=== RESPONSE DEBUG ===")
self.logger.info(f"Response status: {response.status}")
self.logger.info(f"Response headers: {dict(response.headers)}")
self.logger.info(
f"Response content-type: {response.headers.get('content-type', 'N/A')}"
)
# Log response body for non-streaming 404s
if response.status == 404:
error_body = await response.text()
self.logger.error(f"404 Error body: {error_body}")
return web.Response(
text=f"Ollama 404 Error - URL: {target_url}\nError: {error_body}",
status=404,
content_type="text/plain",
)
# Handle streaming responses (for generate/chat endpoints)
if response.headers.get("content-type", "").startswith(
"application/x-ndjson"
@ -323,13 +435,19 @@ async def main():
)
# Get default host from OLLAMA_BASE_URL if available
default_host = "localhost"
default_host = "ollama" # Default to "ollama" for Docker environments
base_url = os.getenv("OLLAMA_BASE_URL")
if base_url:
# Extract host from base URL for backward compatibility with CLI args
parsed = urllib.parse.urlparse(base_url)
if parsed.hostname:
default_host = parsed.hostname
else:
# If no OLLAMA_BASE_URL, check if we're likely in a Docker environment
if os.path.exists("/.dockerenv"):
default_host = "ollama"
else:
default_host = "localhost"
parser.add_argument(
"--ollama-host",

View File

@ -2,11 +2,16 @@ aiohappyeyeballs==2.6.1
aiohttp==3.12.15
aiosignal==1.4.0
attrs==25.3.0
certifi==2025.7.14
charset-normalizer==3.4.2
frozenlist==1.7.0
idna==3.10
multidict==6.6.3
propcache==0.3.2
requests==2.32.4
ruff==0.12.7
setuptools==68.1.2
typing_extensions==4.14.1
urllib3==2.5.0
wheel==0.42.0
yarl==1.20.1
yarl==1.20.1

View File

@ -0,0 +1,140 @@
#!/usr/bin/env python3
"""
Simple test script for debugging the Ollama Context Proxy
"""
import requests
import sys
def test_direct_ollama(base_url="http://localhost:11434"):
"""Test direct connection to Ollama"""
print("=== Testing Direct Ollama Connection ===")
print(f"URL: {base_url}")
try:
response = requests.get(f"{base_url}/api/tags", timeout=5)
print(f"Status: {response.status_code}")
print(f"Headers: {dict(response.headers)}")
if response.status_code == 200:
print("✅ Direct Ollama connection OK")
tags_data = response.json()
print(f"Available models: {[model['name'] for model in tags_data.get('models', [])]}")
else:
print(f"❌ Direct Ollama connection failed: {response.text}")
return response.status_code == 200
except Exception as e:
print(f"❌ Direct Ollama connection error: {e}")
return False
def test_proxy_health(proxy_url="http://localhost:11435"):
"""Test proxy health endpoint"""
print("\n=== Testing Proxy Health ===")
print(f"URL: {proxy_url}")
try:
response = requests.get(f"{proxy_url}/health", timeout=5)
print(f"Status: {response.status_code}")
print(f"Response: {response.text}")
if response.status_code == 200:
print("✅ Proxy health check OK")
else:
print("❌ Proxy health check failed")
return response.status_code == 200
except Exception as e:
print(f"❌ Proxy health check error: {e}")
return False
def test_proxy_debug(proxy_url="http://localhost:11435"):
"""Test proxy debug endpoint"""
print("\n=== Testing Proxy Debug ===")
print(f"URL: {proxy_url}/debug/ollama")
try:
response = requests.get(f"{proxy_url}/debug/ollama", timeout=10)
print(f"Status: {response.status_code}")
print(f"Response: {response.text}")
if response.status_code == 200:
print("✅ Proxy debug check OK")
else:
print("❌ Proxy debug check failed")
return response.status_code == 200
except Exception as e:
print(f"❌ Proxy debug check error: {e}")
return False
def test_proxy_request(proxy_url="http://localhost:11435", model="llama2"):
"""Test actual proxy request"""
print("\n=== Testing Proxy Request ===")
print(f"URL: {proxy_url}/proxy-context/auto/api/generate")
payload = {
"model": model,
"prompt": "Hello, world!",
"stream": False
}
try:
response = requests.post(
f"{proxy_url}/proxy-context/auto/api/generate",
json=payload,
timeout=30
)
print(f"Status: {response.status_code}")
print(f"Headers: {dict(response.headers)}")
print(f"Response preview: {response.text[:500]}...")
if response.status_code == 200:
print("✅ Proxy request OK")
else:
print("❌ Proxy request failed")
return response.status_code == 200
except Exception as e:
print(f"❌ Proxy request error: {e}")
return False
def main():
if len(sys.argv) > 1:
if sys.argv[1] == "--help":
print("Usage: python3 test-proxy.py [ollama_url] [proxy_url] [model]")
print(" ollama_url: Default http://localhost:11434")
print(" proxy_url: Default http://localhost:11435")
print(" model: Default llama2")
return
ollama_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434"
proxy_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11435"
model = sys.argv[3] if len(sys.argv) > 3 else "llama2"
print("Ollama Context Proxy Debug Test")
print("===============================")
print(f"Ollama URL: {ollama_url}")
print(f"Proxy URL: {proxy_url}")
print(f"Test Model: {model}")
# Run tests
tests = [
("Direct Ollama", lambda: test_direct_ollama(ollama_url)),
("Proxy Health", lambda: test_proxy_health(proxy_url)),
("Proxy Debug", lambda: test_proxy_debug(proxy_url)),
("Proxy Request", lambda: test_proxy_request(proxy_url, model)),
]
results = []
for test_name, test_func in tests:
success = test_func()
results.append((test_name, success))
print("\n=== Summary ===")
for test_name, success in results:
status = "✅ PASS" if success else "❌ FAIL"
print(f"{test_name}: {status}")
all_passed = all(success for _, success in results)
if all_passed:
print("\n🎉 All tests passed!")
else:
print("\n⚠️ Some tests failed. Check the output above for details.")
sys.exit(1)
if __name__ == "__main__":
main()