Fixed context proxy
This commit is contained in:
parent
8119cd8492
commit
2d2745a788
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@ db/**
|
|||||||
cache/**
|
cache/**
|
||||||
ollama/**
|
ollama/**
|
||||||
.env
|
.env
|
||||||
|
**/venv/**
|
||||||
|
@ -80,9 +80,14 @@ services:
|
|||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
environment:
|
environment:
|
||||||
- OLLAMA_HOST=http://ollama:11434
|
- OLLAMA_BASE_URL=http://ollama:11434
|
||||||
|
volumes:
|
||||||
|
- ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw # Live mount for python venv
|
||||||
|
- ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw
|
||||||
|
- ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw
|
||||||
|
- ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw
|
||||||
ports:
|
ports:
|
||||||
- 11436:11434 # ollama-context-proxy port
|
- 11436:11435 # ollama-context-proxy port
|
||||||
networks:
|
networks:
|
||||||
- internal
|
- internal
|
||||||
|
|
||||||
|
@ -38,7 +38,14 @@ RUN { \
|
|||||||
echo 'echo "Container: ollama-context-proxy"'; \
|
echo 'echo "Container: ollama-context-proxy"'; \
|
||||||
echo 'set -e'; \
|
echo 'set -e'; \
|
||||||
echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \
|
echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \
|
||||||
|
echo 'if [[ ! -d /opt/job_hunter/venv/bin ]]; then'; \
|
||||||
|
echo ' echo "Creating virtual environment at /opt/ollama-context-proxy/venv"'; \
|
||||||
|
echo ' python3 -m venv --clear --system-site-packages /opt/ollama-context-proxy/venv'; \
|
||||||
|
echo 'fi'; \
|
||||||
|
echo 'echo "Activating virtual environment at /opt/ollama-context-proxy/venv"'; \
|
||||||
echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \
|
echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \
|
||||||
|
echo 'echo "Installing requirements from /opt/ollama-context-proxy/requirements.txt"'; \
|
||||||
|
echo 'pip install -r /opt/ollama-context-proxy/requirements.txt >/dev/null || echo "Failed"'; \
|
||||||
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \
|
echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \
|
||||||
echo ' echo "Dropping to shell"'; \
|
echo ' echo "Dropping to shell"'; \
|
||||||
echo ' shift'; \
|
echo ' shift'; \
|
||||||
|
@ -33,7 +33,7 @@ class OllamaContextProxy:
|
|||||||
else:
|
else:
|
||||||
# Fall back to host/port construction
|
# Fall back to host/port construction
|
||||||
if ollama_host is None:
|
if ollama_host is None:
|
||||||
ollama_host = "localhost"
|
ollama_host = "ollama"
|
||||||
self.ollama_base_url = f"http://{ollama_host}:{ollama_port}"
|
self.ollama_base_url = f"http://{ollama_host}:{ollama_port}"
|
||||||
|
|
||||||
self.proxy_port = proxy_port
|
self.proxy_port = proxy_port
|
||||||
@ -70,9 +70,52 @@ class OllamaContextProxy:
|
|||||||
# Optional: Add a health check endpoint
|
# Optional: Add a health check endpoint
|
||||||
app.router.add_get("/", self.health_check)
|
app.router.add_get("/", self.health_check)
|
||||||
app.router.add_get("/health", self.health_check)
|
app.router.add_get("/health", self.health_check)
|
||||||
|
app.router.add_get("/debug/ollama", self.debug_ollama)
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
async def debug_ollama(self, request: web.Request) -> web.Response:
|
||||||
|
"""Debug endpoint to test connectivity to Ollama"""
|
||||||
|
if not self.session:
|
||||||
|
return web.Response(
|
||||||
|
text="Error: HTTP session not initialized",
|
||||||
|
status=500,
|
||||||
|
content_type="text/plain",
|
||||||
|
)
|
||||||
|
|
||||||
|
test_url = f"{self.ollama_base_url}/api/tags"
|
||||||
|
try:
|
||||||
|
# Test basic connectivity to Ollama
|
||||||
|
self.logger.info(f"Testing Ollama connectivity to: {test_url}")
|
||||||
|
|
||||||
|
async with self.session.get(test_url) as response:
|
||||||
|
status = response.status
|
||||||
|
content_type = response.headers.get("content-type", "N/A")
|
||||||
|
body = await response.text()
|
||||||
|
|
||||||
|
return web.Response(
|
||||||
|
text=f"Ollama Debug Test\n"
|
||||||
|
f"=================\n"
|
||||||
|
f"Target URL: {test_url}\n"
|
||||||
|
f"Status: {status}\n"
|
||||||
|
f"Content-Type: {content_type}\n"
|
||||||
|
f"Body Length: {len(body)}\n"
|
||||||
|
f"Body Preview: {body[:500]}...\n"
|
||||||
|
f"\nProxy Base URL: {self.ollama_base_url}\n"
|
||||||
|
f"Available Contexts: {self.available_contexts}",
|
||||||
|
content_type="text/plain",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return web.Response(
|
||||||
|
text=f"Ollama Debug Test FAILED\n"
|
||||||
|
f"========================\n"
|
||||||
|
f"Error: {str(e)}\n"
|
||||||
|
f"Target URL: {test_url}\n"
|
||||||
|
f"Proxy Base URL: {self.ollama_base_url}",
|
||||||
|
status=502,
|
||||||
|
content_type="text/plain",
|
||||||
|
)
|
||||||
|
|
||||||
async def health_check(self, request: web.Request) -> web.Response:
|
async def health_check(self, request: web.Request) -> web.Response:
|
||||||
"""Health check endpoint"""
|
"""Health check endpoint"""
|
||||||
return web.Response(
|
return web.Response(
|
||||||
@ -86,7 +129,9 @@ class OllamaContextProxy:
|
|||||||
content_type="text/plain",
|
content_type="text/plain",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def proxy_handler(self, request: web.Request) -> web.Response:
|
async def proxy_handler(
|
||||||
|
self, request: web.Request
|
||||||
|
) -> web.Response | web.StreamResponse:
|
||||||
"""Handle all proxy requests with context size extraction or auto-detection"""
|
"""Handle all proxy requests with context size extraction or auto-detection"""
|
||||||
|
|
||||||
# Extract context spec and remaining path
|
# Extract context spec and remaining path
|
||||||
@ -97,18 +142,32 @@ class OllamaContextProxy:
|
|||||||
if remaining_path.startswith("/"):
|
if remaining_path.startswith("/"):
|
||||||
remaining_path = remaining_path[1:]
|
remaining_path = remaining_path[1:]
|
||||||
|
|
||||||
# Get request data first (needed for auto-sizing)
|
# Get request data first (needed for auto-sizing) - read only once!
|
||||||
|
original_data = None
|
||||||
|
request_body = None
|
||||||
|
|
||||||
if request.content_type == "application/json":
|
if request.content_type == "application/json":
|
||||||
try:
|
try:
|
||||||
data = await request.json()
|
original_data = await request.json()
|
||||||
except json.JSONDecodeError:
|
# Convert back to bytes for forwarding
|
||||||
data = await request.text()
|
request_body = json.dumps(original_data).encode("utf-8")
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.logger.error(f"Failed to parse JSON: {e}")
|
||||||
|
request_body = await request.read()
|
||||||
|
original_data = request_body.decode("utf-8", errors="ignore")
|
||||||
else:
|
else:
|
||||||
data = await request.read()
|
request_body = await request.read()
|
||||||
|
original_data = request_body
|
||||||
|
|
||||||
|
# Use original_data for analysis, request_body for forwarding
|
||||||
|
data_for_analysis = original_data if original_data is not None else {}
|
||||||
|
data_for_forwarding = request_body if request_body is not None else b""
|
||||||
|
|
||||||
# Determine context size
|
# Determine context size
|
||||||
if context_spec == "auto":
|
if context_spec == "auto":
|
||||||
context_size = self._auto_determine_context_size(data, remaining_path)
|
context_size = self._auto_determine_context_size(
|
||||||
|
data_for_analysis, remaining_path
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
context_size = int(context_spec)
|
context_size = int(context_spec)
|
||||||
|
|
||||||
@ -130,22 +189,57 @@ class OllamaContextProxy:
|
|||||||
else:
|
else:
|
||||||
target_url = f"{self.ollama_base_url}/{remaining_path}"
|
target_url = f"{self.ollama_base_url}/{remaining_path}"
|
||||||
|
|
||||||
self.logger.info(f"Routing to context {context_size} -> {target_url}")
|
# Enhanced debugging
|
||||||
|
self.logger.info("=== REQUEST DEBUG ===")
|
||||||
|
self.logger.info(f"Original request path: {request.path}")
|
||||||
|
self.logger.info(f"Context spec: {context_spec}")
|
||||||
|
self.logger.info(f"Remaining path: '{remaining_path}'")
|
||||||
|
self.logger.info(f"Target URL: {target_url}")
|
||||||
|
self.logger.info(f"Request method: {request.method}")
|
||||||
|
self.logger.info(f"Request headers: {dict(request.headers)}")
|
||||||
|
self.logger.info(f"Request query params: {dict(request.query)}")
|
||||||
|
self.logger.info(f"Content type: {request.content_type}")
|
||||||
|
if isinstance(data_for_analysis, dict):
|
||||||
|
self.logger.info(f"Request data keys: {list(data_for_analysis.keys())}")
|
||||||
|
else:
|
||||||
|
data_len = (
|
||||||
|
len(data_for_analysis)
|
||||||
|
if hasattr(data_for_analysis, "__len__")
|
||||||
|
else "N/A"
|
||||||
|
)
|
||||||
|
self.logger.info(
|
||||||
|
f"Request data type: {type(data_for_analysis)}, length: {data_len}"
|
||||||
|
)
|
||||||
|
self.logger.info(f"Selected context size: {context_size}")
|
||||||
|
|
||||||
# Inject context if needed
|
# Inject context if needed (modify the JSON data, not the raw bytes)
|
||||||
if self._should_inject_context(remaining_path) and isinstance(data, dict):
|
modified_data = False
|
||||||
if "options" not in data:
|
if self._should_inject_context(remaining_path) and isinstance(
|
||||||
data["options"] = {}
|
data_for_analysis, dict
|
||||||
data["options"]["num_ctx"] = context_size
|
):
|
||||||
|
if "options" not in data_for_analysis:
|
||||||
|
data_for_analysis["options"] = {}
|
||||||
|
data_for_analysis["options"]["num_ctx"] = context_size
|
||||||
self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}")
|
self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}")
|
||||||
|
# Re-encode the modified JSON
|
||||||
|
data_for_forwarding = json.dumps(data_for_analysis).encode("utf-8")
|
||||||
|
modified_data = True
|
||||||
|
|
||||||
# Prepare headers (exclude hop-by-hop headers)
|
# Prepare headers (exclude hop-by-hop headers)
|
||||||
headers = {
|
headers = {
|
||||||
key: value
|
key: value
|
||||||
for key, value in request.headers.items()
|
for key, value in request.headers.items()
|
||||||
if key.lower() not in ["host", "connection", "upgrade"]
|
if key.lower() not in ["host", "connection", "upgrade", "content-length"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Update Content-Length if we modified the data
|
||||||
|
if modified_data and isinstance(data_for_forwarding, bytes):
|
||||||
|
headers["Content-Length"] = str(len(data_for_forwarding))
|
||||||
|
|
||||||
|
# Debug the final data being sent
|
||||||
|
self.logger.debug(f"Final data being sent: {data_for_forwarding}")
|
||||||
|
self.logger.debug(f"Final headers: {headers}")
|
||||||
|
|
||||||
if not self.session:
|
if not self.session:
|
||||||
raise RuntimeError("HTTP session not initialized")
|
raise RuntimeError("HTTP session not initialized")
|
||||||
try:
|
try:
|
||||||
@ -153,10 +247,28 @@ class OllamaContextProxy:
|
|||||||
async with self.session.request(
|
async with self.session.request(
|
||||||
method=request.method,
|
method=request.method,
|
||||||
url=target_url,
|
url=target_url,
|
||||||
data=json.dumps(data) if isinstance(data, dict) else data,
|
data=data_for_forwarding,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
params=request.query,
|
params=request.query,
|
||||||
) as response:
|
) as response:
|
||||||
|
# Enhanced response debugging
|
||||||
|
self.logger.info("=== RESPONSE DEBUG ===")
|
||||||
|
self.logger.info(f"Response status: {response.status}")
|
||||||
|
self.logger.info(f"Response headers: {dict(response.headers)}")
|
||||||
|
self.logger.info(
|
||||||
|
f"Response content-type: {response.headers.get('content-type', 'N/A')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log response body for non-streaming 404s
|
||||||
|
if response.status == 404:
|
||||||
|
error_body = await response.text()
|
||||||
|
self.logger.error(f"404 Error body: {error_body}")
|
||||||
|
return web.Response(
|
||||||
|
text=f"Ollama 404 Error - URL: {target_url}\nError: {error_body}",
|
||||||
|
status=404,
|
||||||
|
content_type="text/plain",
|
||||||
|
)
|
||||||
|
|
||||||
# Handle streaming responses (for generate/chat endpoints)
|
# Handle streaming responses (for generate/chat endpoints)
|
||||||
if response.headers.get("content-type", "").startswith(
|
if response.headers.get("content-type", "").startswith(
|
||||||
"application/x-ndjson"
|
"application/x-ndjson"
|
||||||
@ -323,13 +435,19 @@ async def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Get default host from OLLAMA_BASE_URL if available
|
# Get default host from OLLAMA_BASE_URL if available
|
||||||
default_host = "localhost"
|
default_host = "ollama" # Default to "ollama" for Docker environments
|
||||||
base_url = os.getenv("OLLAMA_BASE_URL")
|
base_url = os.getenv("OLLAMA_BASE_URL")
|
||||||
if base_url:
|
if base_url:
|
||||||
# Extract host from base URL for backward compatibility with CLI args
|
# Extract host from base URL for backward compatibility with CLI args
|
||||||
parsed = urllib.parse.urlparse(base_url)
|
parsed = urllib.parse.urlparse(base_url)
|
||||||
if parsed.hostname:
|
if parsed.hostname:
|
||||||
default_host = parsed.hostname
|
default_host = parsed.hostname
|
||||||
|
else:
|
||||||
|
# If no OLLAMA_BASE_URL, check if we're likely in a Docker environment
|
||||||
|
if os.path.exists("/.dockerenv"):
|
||||||
|
default_host = "ollama"
|
||||||
|
else:
|
||||||
|
default_host = "localhost"
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ollama-host",
|
"--ollama-host",
|
||||||
|
@ -2,11 +2,16 @@ aiohappyeyeballs==2.6.1
|
|||||||
aiohttp==3.12.15
|
aiohttp==3.12.15
|
||||||
aiosignal==1.4.0
|
aiosignal==1.4.0
|
||||||
attrs==25.3.0
|
attrs==25.3.0
|
||||||
|
certifi==2025.7.14
|
||||||
|
charset-normalizer==3.4.2
|
||||||
frozenlist==1.7.0
|
frozenlist==1.7.0
|
||||||
idna==3.10
|
idna==3.10
|
||||||
multidict==6.6.3
|
multidict==6.6.3
|
||||||
propcache==0.3.2
|
propcache==0.3.2
|
||||||
|
requests==2.32.4
|
||||||
|
ruff==0.12.7
|
||||||
setuptools==68.1.2
|
setuptools==68.1.2
|
||||||
typing_extensions==4.14.1
|
typing_extensions==4.14.1
|
||||||
|
urllib3==2.5.0
|
||||||
wheel==0.42.0
|
wheel==0.42.0
|
||||||
yarl==1.20.1
|
yarl==1.20.1
|
140
ollama-context-proxy/test-proxy.py
Normal file
140
ollama-context-proxy/test-proxy.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple test script for debugging the Ollama Context Proxy
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def test_direct_ollama(base_url="http://localhost:11434"):
|
||||||
|
"""Test direct connection to Ollama"""
|
||||||
|
print("=== Testing Direct Ollama Connection ===")
|
||||||
|
print(f"URL: {base_url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{base_url}/api/tags", timeout=5)
|
||||||
|
print(f"Status: {response.status_code}")
|
||||||
|
print(f"Headers: {dict(response.headers)}")
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("✅ Direct Ollama connection OK")
|
||||||
|
tags_data = response.json()
|
||||||
|
print(f"Available models: {[model['name'] for model in tags_data.get('models', [])]}")
|
||||||
|
else:
|
||||||
|
print(f"❌ Direct Ollama connection failed: {response.text}")
|
||||||
|
return response.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Direct Ollama connection error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_proxy_health(proxy_url="http://localhost:11435"):
|
||||||
|
"""Test proxy health endpoint"""
|
||||||
|
print("\n=== Testing Proxy Health ===")
|
||||||
|
print(f"URL: {proxy_url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{proxy_url}/health", timeout=5)
|
||||||
|
print(f"Status: {response.status_code}")
|
||||||
|
print(f"Response: {response.text}")
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("✅ Proxy health check OK")
|
||||||
|
else:
|
||||||
|
print("❌ Proxy health check failed")
|
||||||
|
return response.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Proxy health check error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_proxy_debug(proxy_url="http://localhost:11435"):
|
||||||
|
"""Test proxy debug endpoint"""
|
||||||
|
print("\n=== Testing Proxy Debug ===")
|
||||||
|
print(f"URL: {proxy_url}/debug/ollama")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{proxy_url}/debug/ollama", timeout=10)
|
||||||
|
print(f"Status: {response.status_code}")
|
||||||
|
print(f"Response: {response.text}")
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("✅ Proxy debug check OK")
|
||||||
|
else:
|
||||||
|
print("❌ Proxy debug check failed")
|
||||||
|
return response.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Proxy debug check error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_proxy_request(proxy_url="http://localhost:11435", model="llama2"):
|
||||||
|
"""Test actual proxy request"""
|
||||||
|
print("\n=== Testing Proxy Request ===")
|
||||||
|
print(f"URL: {proxy_url}/proxy-context/auto/api/generate")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"prompt": "Hello, world!",
|
||||||
|
"stream": False
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{proxy_url}/proxy-context/auto/api/generate",
|
||||||
|
json=payload,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
print(f"Status: {response.status_code}")
|
||||||
|
print(f"Headers: {dict(response.headers)}")
|
||||||
|
print(f"Response preview: {response.text[:500]}...")
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("✅ Proxy request OK")
|
||||||
|
else:
|
||||||
|
print("❌ Proxy request failed")
|
||||||
|
return response.status_code == 200
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Proxy request error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
if sys.argv[1] == "--help":
|
||||||
|
print("Usage: python3 test-proxy.py [ollama_url] [proxy_url] [model]")
|
||||||
|
print(" ollama_url: Default http://localhost:11434")
|
||||||
|
print(" proxy_url: Default http://localhost:11435")
|
||||||
|
print(" model: Default llama2")
|
||||||
|
return
|
||||||
|
|
||||||
|
ollama_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434"
|
||||||
|
proxy_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11435"
|
||||||
|
model = sys.argv[3] if len(sys.argv) > 3 else "llama2"
|
||||||
|
|
||||||
|
print("Ollama Context Proxy Debug Test")
|
||||||
|
print("===============================")
|
||||||
|
print(f"Ollama URL: {ollama_url}")
|
||||||
|
print(f"Proxy URL: {proxy_url}")
|
||||||
|
print(f"Test Model: {model}")
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
tests = [
|
||||||
|
("Direct Ollama", lambda: test_direct_ollama(ollama_url)),
|
||||||
|
("Proxy Health", lambda: test_proxy_health(proxy_url)),
|
||||||
|
("Proxy Debug", lambda: test_proxy_debug(proxy_url)),
|
||||||
|
("Proxy Request", lambda: test_proxy_request(proxy_url, model)),
|
||||||
|
]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for test_name, test_func in tests:
|
||||||
|
success = test_func()
|
||||||
|
results.append((test_name, success))
|
||||||
|
|
||||||
|
print("\n=== Summary ===")
|
||||||
|
for test_name, success in results:
|
||||||
|
status = "✅ PASS" if success else "❌ FAIL"
|
||||||
|
print(f"{test_name}: {status}")
|
||||||
|
|
||||||
|
all_passed = all(success for _, success in results)
|
||||||
|
if all_passed:
|
||||||
|
print("\n🎉 All tests passed!")
|
||||||
|
else:
|
||||||
|
print("\n⚠️ Some tests failed. Check the output above for details.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
x
Reference in New Issue
Block a user