diff --git a/.gitignore b/.gitignore index fca52c8..004fdaf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ db/** cache/** ollama/** .env +**/venv/** diff --git a/docker-compose.yml b/docker-compose.yml index 0d9e269..d1d3fda 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -80,9 +80,14 @@ services: env_file: - .env environment: - - OLLAMA_HOST=http://ollama:11434 + - OLLAMA_BASE_URL=http://ollama:11434 + volumes: + - ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw # Live mount for python venv + - ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw + - ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw + - ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw ports: - - 11436:11434 # ollama-context-proxy port + - 11436:11435 # ollama-context-proxy port networks: - internal diff --git a/ollama-context-proxy/Dockerfile b/ollama-context-proxy/Dockerfile index ce65378..6a4035e 100644 --- a/ollama-context-proxy/Dockerfile +++ b/ollama-context-proxy/Dockerfile @@ -38,7 +38,14 @@ RUN { \ echo 'echo "Container: ollama-context-proxy"'; \ echo 'set -e'; \ echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \ + echo 'if [[ ! -d /opt/job_hunter/venv/bin ]]; then'; \ + echo ' echo "Creating virtual environment at /opt/ollama-context-proxy/venv"'; \ + echo ' python3 -m venv --clear --system-site-packages /opt/ollama-context-proxy/venv'; \ + echo 'fi'; \ + echo 'echo "Activating virtual environment at /opt/ollama-context-proxy/venv"'; \ echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \ + echo 'echo "Installing requirements from /opt/ollama-context-proxy/requirements.txt"'; \ + echo 'pip install -r /opt/ollama-context-proxy/requirements.txt >/dev/null || echo "Failed"'; \ echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \ echo ' echo "Dropping to shell"'; \ echo ' shift'; \ diff --git a/ollama-context-proxy/ollama-context-proxy.py b/ollama-context-proxy/ollama-context-proxy.py index f0d9a3c..867ecbb 100644 --- a/ollama-context-proxy/ollama-context-proxy.py +++ b/ollama-context-proxy/ollama-context-proxy.py @@ -33,7 +33,7 @@ class OllamaContextProxy: else: # Fall back to host/port construction if ollama_host is None: - ollama_host = "localhost" + ollama_host = "ollama" self.ollama_base_url = f"http://{ollama_host}:{ollama_port}" self.proxy_port = proxy_port @@ -70,9 +70,52 @@ class OllamaContextProxy: # Optional: Add a health check endpoint app.router.add_get("/", self.health_check) app.router.add_get("/health", self.health_check) + app.router.add_get("/debug/ollama", self.debug_ollama) return app + async def debug_ollama(self, request: web.Request) -> web.Response: + """Debug endpoint to test connectivity to Ollama""" + if not self.session: + return web.Response( + text="Error: HTTP session not initialized", + status=500, + content_type="text/plain", + ) + + test_url = f"{self.ollama_base_url}/api/tags" + try: + # Test basic connectivity to Ollama + self.logger.info(f"Testing Ollama connectivity to: {test_url}") + + async with self.session.get(test_url) as response: + status = response.status + content_type = response.headers.get("content-type", "N/A") + body = await response.text() + + return web.Response( + text=f"Ollama Debug Test\n" + f"=================\n" + f"Target URL: {test_url}\n" + f"Status: {status}\n" + f"Content-Type: {content_type}\n" + f"Body Length: {len(body)}\n" + f"Body Preview: {body[:500]}...\n" + f"\nProxy Base URL: {self.ollama_base_url}\n" + f"Available Contexts: {self.available_contexts}", + content_type="text/plain", + ) + except Exception as e: + return web.Response( + text=f"Ollama Debug Test FAILED\n" + f"========================\n" + f"Error: {str(e)}\n" + f"Target URL: {test_url}\n" + f"Proxy Base URL: {self.ollama_base_url}", + status=502, + content_type="text/plain", + ) + async def health_check(self, request: web.Request) -> web.Response: """Health check endpoint""" return web.Response( @@ -86,7 +129,9 @@ class OllamaContextProxy: content_type="text/plain", ) - async def proxy_handler(self, request: web.Request) -> web.Response: + async def proxy_handler( + self, request: web.Request + ) -> web.Response | web.StreamResponse: """Handle all proxy requests with context size extraction or auto-detection""" # Extract context spec and remaining path @@ -97,18 +142,32 @@ class OllamaContextProxy: if remaining_path.startswith("/"): remaining_path = remaining_path[1:] - # Get request data first (needed for auto-sizing) + # Get request data first (needed for auto-sizing) - read only once! + original_data = None + request_body = None + if request.content_type == "application/json": try: - data = await request.json() - except json.JSONDecodeError: - data = await request.text() + original_data = await request.json() + # Convert back to bytes for forwarding + request_body = json.dumps(original_data).encode("utf-8") + except json.JSONDecodeError as e: + self.logger.error(f"Failed to parse JSON: {e}") + request_body = await request.read() + original_data = request_body.decode("utf-8", errors="ignore") else: - data = await request.read() + request_body = await request.read() + original_data = request_body + + # Use original_data for analysis, request_body for forwarding + data_for_analysis = original_data if original_data is not None else {} + data_for_forwarding = request_body if request_body is not None else b"" # Determine context size if context_spec == "auto": - context_size = self._auto_determine_context_size(data, remaining_path) + context_size = self._auto_determine_context_size( + data_for_analysis, remaining_path + ) else: context_size = int(context_spec) @@ -130,22 +189,57 @@ class OllamaContextProxy: else: target_url = f"{self.ollama_base_url}/{remaining_path}" - self.logger.info(f"Routing to context {context_size} -> {target_url}") + # Enhanced debugging + self.logger.info("=== REQUEST DEBUG ===") + self.logger.info(f"Original request path: {request.path}") + self.logger.info(f"Context spec: {context_spec}") + self.logger.info(f"Remaining path: '{remaining_path}'") + self.logger.info(f"Target URL: {target_url}") + self.logger.info(f"Request method: {request.method}") + self.logger.info(f"Request headers: {dict(request.headers)}") + self.logger.info(f"Request query params: {dict(request.query)}") + self.logger.info(f"Content type: {request.content_type}") + if isinstance(data_for_analysis, dict): + self.logger.info(f"Request data keys: {list(data_for_analysis.keys())}") + else: + data_len = ( + len(data_for_analysis) + if hasattr(data_for_analysis, "__len__") + else "N/A" + ) + self.logger.info( + f"Request data type: {type(data_for_analysis)}, length: {data_len}" + ) + self.logger.info(f"Selected context size: {context_size}") - # Inject context if needed - if self._should_inject_context(remaining_path) and isinstance(data, dict): - if "options" not in data: - data["options"] = {} - data["options"]["num_ctx"] = context_size + # Inject context if needed (modify the JSON data, not the raw bytes) + modified_data = False + if self._should_inject_context(remaining_path) and isinstance( + data_for_analysis, dict + ): + if "options" not in data_for_analysis: + data_for_analysis["options"] = {} + data_for_analysis["options"]["num_ctx"] = context_size self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}") + # Re-encode the modified JSON + data_for_forwarding = json.dumps(data_for_analysis).encode("utf-8") + modified_data = True # Prepare headers (exclude hop-by-hop headers) headers = { key: value for key, value in request.headers.items() - if key.lower() not in ["host", "connection", "upgrade"] + if key.lower() not in ["host", "connection", "upgrade", "content-length"] } + # Update Content-Length if we modified the data + if modified_data and isinstance(data_for_forwarding, bytes): + headers["Content-Length"] = str(len(data_for_forwarding)) + + # Debug the final data being sent + self.logger.debug(f"Final data being sent: {data_for_forwarding}") + self.logger.debug(f"Final headers: {headers}") + if not self.session: raise RuntimeError("HTTP session not initialized") try: @@ -153,10 +247,28 @@ class OllamaContextProxy: async with self.session.request( method=request.method, url=target_url, - data=json.dumps(data) if isinstance(data, dict) else data, + data=data_for_forwarding, headers=headers, params=request.query, ) as response: + # Enhanced response debugging + self.logger.info("=== RESPONSE DEBUG ===") + self.logger.info(f"Response status: {response.status}") + self.logger.info(f"Response headers: {dict(response.headers)}") + self.logger.info( + f"Response content-type: {response.headers.get('content-type', 'N/A')}" + ) + + # Log response body for non-streaming 404s + if response.status == 404: + error_body = await response.text() + self.logger.error(f"404 Error body: {error_body}") + return web.Response( + text=f"Ollama 404 Error - URL: {target_url}\nError: {error_body}", + status=404, + content_type="text/plain", + ) + # Handle streaming responses (for generate/chat endpoints) if response.headers.get("content-type", "").startswith( "application/x-ndjson" @@ -323,13 +435,19 @@ async def main(): ) # Get default host from OLLAMA_BASE_URL if available - default_host = "localhost" + default_host = "ollama" # Default to "ollama" for Docker environments base_url = os.getenv("OLLAMA_BASE_URL") if base_url: # Extract host from base URL for backward compatibility with CLI args parsed = urllib.parse.urlparse(base_url) if parsed.hostname: default_host = parsed.hostname + else: + # If no OLLAMA_BASE_URL, check if we're likely in a Docker environment + if os.path.exists("/.dockerenv"): + default_host = "ollama" + else: + default_host = "localhost" parser.add_argument( "--ollama-host", diff --git a/ollama-context-proxy/requirements.txt b/ollama-context-proxy/requirements.txt index 49e072d..1f5fdbf 100644 --- a/ollama-context-proxy/requirements.txt +++ b/ollama-context-proxy/requirements.txt @@ -2,11 +2,16 @@ aiohappyeyeballs==2.6.1 aiohttp==3.12.15 aiosignal==1.4.0 attrs==25.3.0 +certifi==2025.7.14 +charset-normalizer==3.4.2 frozenlist==1.7.0 idna==3.10 multidict==6.6.3 propcache==0.3.2 +requests==2.32.4 +ruff==0.12.7 setuptools==68.1.2 typing_extensions==4.14.1 +urllib3==2.5.0 wheel==0.42.0 -yarl==1.20.1 \ No newline at end of file +yarl==1.20.1 diff --git a/ollama-context-proxy/test-proxy.py b/ollama-context-proxy/test-proxy.py new file mode 100644 index 0000000..9455b4e --- /dev/null +++ b/ollama-context-proxy/test-proxy.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +Simple test script for debugging the Ollama Context Proxy +""" + +import requests +import sys + +def test_direct_ollama(base_url="http://localhost:11434"): + """Test direct connection to Ollama""" + print("=== Testing Direct Ollama Connection ===") + print(f"URL: {base_url}") + + try: + response = requests.get(f"{base_url}/api/tags", timeout=5) + print(f"Status: {response.status_code}") + print(f"Headers: {dict(response.headers)}") + if response.status_code == 200: + print("✅ Direct Ollama connection OK") + tags_data = response.json() + print(f"Available models: {[model['name'] for model in tags_data.get('models', [])]}") + else: + print(f"❌ Direct Ollama connection failed: {response.text}") + return response.status_code == 200 + except Exception as e: + print(f"❌ Direct Ollama connection error: {e}") + return False + +def test_proxy_health(proxy_url="http://localhost:11435"): + """Test proxy health endpoint""" + print("\n=== Testing Proxy Health ===") + print(f"URL: {proxy_url}") + + try: + response = requests.get(f"{proxy_url}/health", timeout=5) + print(f"Status: {response.status_code}") + print(f"Response: {response.text}") + if response.status_code == 200: + print("✅ Proxy health check OK") + else: + print("❌ Proxy health check failed") + return response.status_code == 200 + except Exception as e: + print(f"❌ Proxy health check error: {e}") + return False + +def test_proxy_debug(proxy_url="http://localhost:11435"): + """Test proxy debug endpoint""" + print("\n=== Testing Proxy Debug ===") + print(f"URL: {proxy_url}/debug/ollama") + + try: + response = requests.get(f"{proxy_url}/debug/ollama", timeout=10) + print(f"Status: {response.status_code}") + print(f"Response: {response.text}") + if response.status_code == 200: + print("✅ Proxy debug check OK") + else: + print("❌ Proxy debug check failed") + return response.status_code == 200 + except Exception as e: + print(f"❌ Proxy debug check error: {e}") + return False + +def test_proxy_request(proxy_url="http://localhost:11435", model="llama2"): + """Test actual proxy request""" + print("\n=== Testing Proxy Request ===") + print(f"URL: {proxy_url}/proxy-context/auto/api/generate") + + payload = { + "model": model, + "prompt": "Hello, world!", + "stream": False + } + + try: + response = requests.post( + f"{proxy_url}/proxy-context/auto/api/generate", + json=payload, + timeout=30 + ) + print(f"Status: {response.status_code}") + print(f"Headers: {dict(response.headers)}") + print(f"Response preview: {response.text[:500]}...") + + if response.status_code == 200: + print("✅ Proxy request OK") + else: + print("❌ Proxy request failed") + return response.status_code == 200 + except Exception as e: + print(f"❌ Proxy request error: {e}") + return False + +def main(): + if len(sys.argv) > 1: + if sys.argv[1] == "--help": + print("Usage: python3 test-proxy.py [ollama_url] [proxy_url] [model]") + print(" ollama_url: Default http://localhost:11434") + print(" proxy_url: Default http://localhost:11435") + print(" model: Default llama2") + return + + ollama_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434" + proxy_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11435" + model = sys.argv[3] if len(sys.argv) > 3 else "llama2" + + print("Ollama Context Proxy Debug Test") + print("===============================") + print(f"Ollama URL: {ollama_url}") + print(f"Proxy URL: {proxy_url}") + print(f"Test Model: {model}") + + # Run tests + tests = [ + ("Direct Ollama", lambda: test_direct_ollama(ollama_url)), + ("Proxy Health", lambda: test_proxy_health(proxy_url)), + ("Proxy Debug", lambda: test_proxy_debug(proxy_url)), + ("Proxy Request", lambda: test_proxy_request(proxy_url, model)), + ] + + results = [] + for test_name, test_func in tests: + success = test_func() + results.append((test_name, success)) + + print("\n=== Summary ===") + for test_name, success in results: + status = "✅ PASS" if success else "❌ FAIL" + print(f"{test_name}: {status}") + + all_passed = all(success for _, success in results) + if all_passed: + print("\n🎉 All tests passed!") + else: + print("\n⚠️ Some tests failed. Check the output above for details.") + sys.exit(1) + +if __name__ == "__main__": + main()