Fixed context proxy
This commit is contained in:
parent
8119cd8492
commit
2d2745a788
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@ db/**
|
||||
cache/**
|
||||
ollama/**
|
||||
.env
|
||||
**/venv/**
|
||||
|
@ -80,9 +80,14 @@ services:
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- OLLAMA_HOST=http://ollama:11434
|
||||
- OLLAMA_BASE_URL=http://ollama:11434
|
||||
volumes:
|
||||
- ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw # Live mount for python venv
|
||||
- ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw
|
||||
- ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw
|
||||
- ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw
|
||||
ports:
|
||||
- 11436:11434 # ollama-context-proxy port
|
||||
- 11436:11435 # ollama-context-proxy port
|
||||
networks:
|
||||
- internal
|
||||
|
||||
|
@ -38,7 +38,14 @@ RUN { \
|
||||
echo 'echo "Container: ollama-context-proxy"'; \
|
||||
echo 'set -e'; \
|
||||
echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \
|
||||
echo 'if [[ ! -d /opt/job_hunter/venv/bin ]]; then'; \
|
||||
echo ' echo "Creating virtual environment at /opt/ollama-context-proxy/venv"'; \
|
||||
echo ' python3 -m venv --clear --system-site-packages /opt/ollama-context-proxy/venv'; \
|
||||
echo 'fi'; \
|
||||
echo 'echo "Activating virtual environment at /opt/ollama-context-proxy/venv"'; \
|
||||
echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \
|
||||
echo 'echo "Installing requirements from /opt/ollama-context-proxy/requirements.txt"'; \
|
||||
echo 'pip install -r /opt/ollama-context-proxy/requirements.txt >/dev/null || echo "Failed"'; \
|
||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \
|
||||
echo ' echo "Dropping to shell"'; \
|
||||
echo ' shift'; \
|
||||
|
@ -33,7 +33,7 @@ class OllamaContextProxy:
|
||||
else:
|
||||
# Fall back to host/port construction
|
||||
if ollama_host is None:
|
||||
ollama_host = "localhost"
|
||||
ollama_host = "ollama"
|
||||
self.ollama_base_url = f"http://{ollama_host}:{ollama_port}"
|
||||
|
||||
self.proxy_port = proxy_port
|
||||
@ -70,9 +70,52 @@ class OllamaContextProxy:
|
||||
# Optional: Add a health check endpoint
|
||||
app.router.add_get("/", self.health_check)
|
||||
app.router.add_get("/health", self.health_check)
|
||||
app.router.add_get("/debug/ollama", self.debug_ollama)
|
||||
|
||||
return app
|
||||
|
||||
async def debug_ollama(self, request: web.Request) -> web.Response:
|
||||
"""Debug endpoint to test connectivity to Ollama"""
|
||||
if not self.session:
|
||||
return web.Response(
|
||||
text="Error: HTTP session not initialized",
|
||||
status=500,
|
||||
content_type="text/plain",
|
||||
)
|
||||
|
||||
test_url = f"{self.ollama_base_url}/api/tags"
|
||||
try:
|
||||
# Test basic connectivity to Ollama
|
||||
self.logger.info(f"Testing Ollama connectivity to: {test_url}")
|
||||
|
||||
async with self.session.get(test_url) as response:
|
||||
status = response.status
|
||||
content_type = response.headers.get("content-type", "N/A")
|
||||
body = await response.text()
|
||||
|
||||
return web.Response(
|
||||
text=f"Ollama Debug Test\n"
|
||||
f"=================\n"
|
||||
f"Target URL: {test_url}\n"
|
||||
f"Status: {status}\n"
|
||||
f"Content-Type: {content_type}\n"
|
||||
f"Body Length: {len(body)}\n"
|
||||
f"Body Preview: {body[:500]}...\n"
|
||||
f"\nProxy Base URL: {self.ollama_base_url}\n"
|
||||
f"Available Contexts: {self.available_contexts}",
|
||||
content_type="text/plain",
|
||||
)
|
||||
except Exception as e:
|
||||
return web.Response(
|
||||
text=f"Ollama Debug Test FAILED\n"
|
||||
f"========================\n"
|
||||
f"Error: {str(e)}\n"
|
||||
f"Target URL: {test_url}\n"
|
||||
f"Proxy Base URL: {self.ollama_base_url}",
|
||||
status=502,
|
||||
content_type="text/plain",
|
||||
)
|
||||
|
||||
async def health_check(self, request: web.Request) -> web.Response:
|
||||
"""Health check endpoint"""
|
||||
return web.Response(
|
||||
@ -86,7 +129,9 @@ class OllamaContextProxy:
|
||||
content_type="text/plain",
|
||||
)
|
||||
|
||||
async def proxy_handler(self, request: web.Request) -> web.Response:
|
||||
async def proxy_handler(
|
||||
self, request: web.Request
|
||||
) -> web.Response | web.StreamResponse:
|
||||
"""Handle all proxy requests with context size extraction or auto-detection"""
|
||||
|
||||
# Extract context spec and remaining path
|
||||
@ -97,18 +142,32 @@ class OllamaContextProxy:
|
||||
if remaining_path.startswith("/"):
|
||||
remaining_path = remaining_path[1:]
|
||||
|
||||
# Get request data first (needed for auto-sizing)
|
||||
# Get request data first (needed for auto-sizing) - read only once!
|
||||
original_data = None
|
||||
request_body = None
|
||||
|
||||
if request.content_type == "application/json":
|
||||
try:
|
||||
data = await request.json()
|
||||
except json.JSONDecodeError:
|
||||
data = await request.text()
|
||||
original_data = await request.json()
|
||||
# Convert back to bytes for forwarding
|
||||
request_body = json.dumps(original_data).encode("utf-8")
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.error(f"Failed to parse JSON: {e}")
|
||||
request_body = await request.read()
|
||||
original_data = request_body.decode("utf-8", errors="ignore")
|
||||
else:
|
||||
data = await request.read()
|
||||
request_body = await request.read()
|
||||
original_data = request_body
|
||||
|
||||
# Use original_data for analysis, request_body for forwarding
|
||||
data_for_analysis = original_data if original_data is not None else {}
|
||||
data_for_forwarding = request_body if request_body is not None else b""
|
||||
|
||||
# Determine context size
|
||||
if context_spec == "auto":
|
||||
context_size = self._auto_determine_context_size(data, remaining_path)
|
||||
context_size = self._auto_determine_context_size(
|
||||
data_for_analysis, remaining_path
|
||||
)
|
||||
else:
|
||||
context_size = int(context_spec)
|
||||
|
||||
@ -130,22 +189,57 @@ class OllamaContextProxy:
|
||||
else:
|
||||
target_url = f"{self.ollama_base_url}/{remaining_path}"
|
||||
|
||||
self.logger.info(f"Routing to context {context_size} -> {target_url}")
|
||||
# Enhanced debugging
|
||||
self.logger.info("=== REQUEST DEBUG ===")
|
||||
self.logger.info(f"Original request path: {request.path}")
|
||||
self.logger.info(f"Context spec: {context_spec}")
|
||||
self.logger.info(f"Remaining path: '{remaining_path}'")
|
||||
self.logger.info(f"Target URL: {target_url}")
|
||||
self.logger.info(f"Request method: {request.method}")
|
||||
self.logger.info(f"Request headers: {dict(request.headers)}")
|
||||
self.logger.info(f"Request query params: {dict(request.query)}")
|
||||
self.logger.info(f"Content type: {request.content_type}")
|
||||
if isinstance(data_for_analysis, dict):
|
||||
self.logger.info(f"Request data keys: {list(data_for_analysis.keys())}")
|
||||
else:
|
||||
data_len = (
|
||||
len(data_for_analysis)
|
||||
if hasattr(data_for_analysis, "__len__")
|
||||
else "N/A"
|
||||
)
|
||||
self.logger.info(
|
||||
f"Request data type: {type(data_for_analysis)}, length: {data_len}"
|
||||
)
|
||||
self.logger.info(f"Selected context size: {context_size}")
|
||||
|
||||
# Inject context if needed
|
||||
if self._should_inject_context(remaining_path) and isinstance(data, dict):
|
||||
if "options" not in data:
|
||||
data["options"] = {}
|
||||
data["options"]["num_ctx"] = context_size
|
||||
# Inject context if needed (modify the JSON data, not the raw bytes)
|
||||
modified_data = False
|
||||
if self._should_inject_context(remaining_path) and isinstance(
|
||||
data_for_analysis, dict
|
||||
):
|
||||
if "options" not in data_for_analysis:
|
||||
data_for_analysis["options"] = {}
|
||||
data_for_analysis["options"]["num_ctx"] = context_size
|
||||
self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}")
|
||||
# Re-encode the modified JSON
|
||||
data_for_forwarding = json.dumps(data_for_analysis).encode("utf-8")
|
||||
modified_data = True
|
||||
|
||||
# Prepare headers (exclude hop-by-hop headers)
|
||||
headers = {
|
||||
key: value
|
||||
for key, value in request.headers.items()
|
||||
if key.lower() not in ["host", "connection", "upgrade"]
|
||||
if key.lower() not in ["host", "connection", "upgrade", "content-length"]
|
||||
}
|
||||
|
||||
# Update Content-Length if we modified the data
|
||||
if modified_data and isinstance(data_for_forwarding, bytes):
|
||||
headers["Content-Length"] = str(len(data_for_forwarding))
|
||||
|
||||
# Debug the final data being sent
|
||||
self.logger.debug(f"Final data being sent: {data_for_forwarding}")
|
||||
self.logger.debug(f"Final headers: {headers}")
|
||||
|
||||
if not self.session:
|
||||
raise RuntimeError("HTTP session not initialized")
|
||||
try:
|
||||
@ -153,10 +247,28 @@ class OllamaContextProxy:
|
||||
async with self.session.request(
|
||||
method=request.method,
|
||||
url=target_url,
|
||||
data=json.dumps(data) if isinstance(data, dict) else data,
|
||||
data=data_for_forwarding,
|
||||
headers=headers,
|
||||
params=request.query,
|
||||
) as response:
|
||||
# Enhanced response debugging
|
||||
self.logger.info("=== RESPONSE DEBUG ===")
|
||||
self.logger.info(f"Response status: {response.status}")
|
||||
self.logger.info(f"Response headers: {dict(response.headers)}")
|
||||
self.logger.info(
|
||||
f"Response content-type: {response.headers.get('content-type', 'N/A')}"
|
||||
)
|
||||
|
||||
# Log response body for non-streaming 404s
|
||||
if response.status == 404:
|
||||
error_body = await response.text()
|
||||
self.logger.error(f"404 Error body: {error_body}")
|
||||
return web.Response(
|
||||
text=f"Ollama 404 Error - URL: {target_url}\nError: {error_body}",
|
||||
status=404,
|
||||
content_type="text/plain",
|
||||
)
|
||||
|
||||
# Handle streaming responses (for generate/chat endpoints)
|
||||
if response.headers.get("content-type", "").startswith(
|
||||
"application/x-ndjson"
|
||||
@ -323,13 +435,19 @@ async def main():
|
||||
)
|
||||
|
||||
# Get default host from OLLAMA_BASE_URL if available
|
||||
default_host = "localhost"
|
||||
default_host = "ollama" # Default to "ollama" for Docker environments
|
||||
base_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if base_url:
|
||||
# Extract host from base URL for backward compatibility with CLI args
|
||||
parsed = urllib.parse.urlparse(base_url)
|
||||
if parsed.hostname:
|
||||
default_host = parsed.hostname
|
||||
else:
|
||||
# If no OLLAMA_BASE_URL, check if we're likely in a Docker environment
|
||||
if os.path.exists("/.dockerenv"):
|
||||
default_host = "ollama"
|
||||
else:
|
||||
default_host = "localhost"
|
||||
|
||||
parser.add_argument(
|
||||
"--ollama-host",
|
||||
|
@ -2,11 +2,16 @@ aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.12.15
|
||||
aiosignal==1.4.0
|
||||
attrs==25.3.0
|
||||
certifi==2025.7.14
|
||||
charset-normalizer==3.4.2
|
||||
frozenlist==1.7.0
|
||||
idna==3.10
|
||||
multidict==6.6.3
|
||||
propcache==0.3.2
|
||||
requests==2.32.4
|
||||
ruff==0.12.7
|
||||
setuptools==68.1.2
|
||||
typing_extensions==4.14.1
|
||||
urllib3==2.5.0
|
||||
wheel==0.42.0
|
||||
yarl==1.20.1
|
||||
yarl==1.20.1
|
||||
|
140
ollama-context-proxy/test-proxy.py
Normal file
140
ollama-context-proxy/test-proxy.py
Normal file
@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple test script for debugging the Ollama Context Proxy
|
||||
"""
|
||||
|
||||
import requests
|
||||
import sys
|
||||
|
||||
def test_direct_ollama(base_url="http://localhost:11434"):
|
||||
"""Test direct connection to Ollama"""
|
||||
print("=== Testing Direct Ollama Connection ===")
|
||||
print(f"URL: {base_url}")
|
||||
|
||||
try:
|
||||
response = requests.get(f"{base_url}/api/tags", timeout=5)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Headers: {dict(response.headers)}")
|
||||
if response.status_code == 200:
|
||||
print("✅ Direct Ollama connection OK")
|
||||
tags_data = response.json()
|
||||
print(f"Available models: {[model['name'] for model in tags_data.get('models', [])]}")
|
||||
else:
|
||||
print(f"❌ Direct Ollama connection failed: {response.text}")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"❌ Direct Ollama connection error: {e}")
|
||||
return False
|
||||
|
||||
def test_proxy_health(proxy_url="http://localhost:11435"):
|
||||
"""Test proxy health endpoint"""
|
||||
print("\n=== Testing Proxy Health ===")
|
||||
print(f"URL: {proxy_url}")
|
||||
|
||||
try:
|
||||
response = requests.get(f"{proxy_url}/health", timeout=5)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response: {response.text}")
|
||||
if response.status_code == 200:
|
||||
print("✅ Proxy health check OK")
|
||||
else:
|
||||
print("❌ Proxy health check failed")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"❌ Proxy health check error: {e}")
|
||||
return False
|
||||
|
||||
def test_proxy_debug(proxy_url="http://localhost:11435"):
|
||||
"""Test proxy debug endpoint"""
|
||||
print("\n=== Testing Proxy Debug ===")
|
||||
print(f"URL: {proxy_url}/debug/ollama")
|
||||
|
||||
try:
|
||||
response = requests.get(f"{proxy_url}/debug/ollama", timeout=10)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Response: {response.text}")
|
||||
if response.status_code == 200:
|
||||
print("✅ Proxy debug check OK")
|
||||
else:
|
||||
print("❌ Proxy debug check failed")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"❌ Proxy debug check error: {e}")
|
||||
return False
|
||||
|
||||
def test_proxy_request(proxy_url="http://localhost:11435", model="llama2"):
|
||||
"""Test actual proxy request"""
|
||||
print("\n=== Testing Proxy Request ===")
|
||||
print(f"URL: {proxy_url}/proxy-context/auto/api/generate")
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": "Hello, world!",
|
||||
"stream": False
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{proxy_url}/proxy-context/auto/api/generate",
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
print(f"Status: {response.status_code}")
|
||||
print(f"Headers: {dict(response.headers)}")
|
||||
print(f"Response preview: {response.text[:500]}...")
|
||||
|
||||
if response.status_code == 200:
|
||||
print("✅ Proxy request OK")
|
||||
else:
|
||||
print("❌ Proxy request failed")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"❌ Proxy request error: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == "--help":
|
||||
print("Usage: python3 test-proxy.py [ollama_url] [proxy_url] [model]")
|
||||
print(" ollama_url: Default http://localhost:11434")
|
||||
print(" proxy_url: Default http://localhost:11435")
|
||||
print(" model: Default llama2")
|
||||
return
|
||||
|
||||
ollama_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434"
|
||||
proxy_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11435"
|
||||
model = sys.argv[3] if len(sys.argv) > 3 else "llama2"
|
||||
|
||||
print("Ollama Context Proxy Debug Test")
|
||||
print("===============================")
|
||||
print(f"Ollama URL: {ollama_url}")
|
||||
print(f"Proxy URL: {proxy_url}")
|
||||
print(f"Test Model: {model}")
|
||||
|
||||
# Run tests
|
||||
tests = [
|
||||
("Direct Ollama", lambda: test_direct_ollama(ollama_url)),
|
||||
("Proxy Health", lambda: test_proxy_health(proxy_url)),
|
||||
("Proxy Debug", lambda: test_proxy_debug(proxy_url)),
|
||||
("Proxy Request", lambda: test_proxy_request(proxy_url, model)),
|
||||
]
|
||||
|
||||
results = []
|
||||
for test_name, test_func in tests:
|
||||
success = test_func()
|
||||
results.append((test_name, success))
|
||||
|
||||
print("\n=== Summary ===")
|
||||
for test_name, success in results:
|
||||
status = "✅ PASS" if success else "❌ FAIL"
|
||||
print(f"{test_name}: {status}")
|
||||
|
||||
all_passed = all(success for _, success in results)
|
||||
if all_passed:
|
||||
print("\n🎉 All tests passed!")
|
||||
else:
|
||||
print("\n⚠️ Some tests failed. Check the output above for details.")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user