Fixed context proxy

2025-08-01 10:22:33 -07:00 · 2025-08-01 10:22:33 -07:00 · 2d2745a788
commit 2d2745a788
parent 8119cd8492
6 changed files with 296 additions and 20 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@ db/**
 cache/**
 ollama/**
 .env
 **/venv/**
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -80,9 +80,14 @@ services:
    env_file:
      - .env
    environment:
-      - OLLAMA_HOST=http://ollama:11434
+      - OLLAMA_BASE_URL=http://ollama:11434
    volumes:
      - ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw            # Live mount for python venv
      - ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw
      - ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw
      - ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw
    ports:
-      - 11436:11434 # ollama-context-proxy port
+      - 11436:11435 # ollama-context-proxy port
    networks:
      - internal
--- a/ollama-context-proxy/Dockerfile
+++ b/ollama-context-proxy/Dockerfile
@ -38,7 +38,14 @@ RUN { \
    echo 'echo "Container: ollama-context-proxy"'; \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \
    echo 'if [[ ! -d /opt/job_hunter/venv/bin ]]; then'; \
    echo '  echo "Creating virtual environment at /opt/ollama-context-proxy/venv"'; \
    echo '  python3 -m venv --clear --system-site-packages /opt/ollama-context-proxy/venv'; \
    echo 'fi'; \
    echo 'echo "Activating virtual environment at /opt/ollama-context-proxy/venv"'; \
    echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \
    echo 'echo "Installing requirements from /opt/ollama-context-proxy/requirements.txt"'; \
    echo 'pip install -r /opt/ollama-context-proxy/requirements.txt >/dev/null || echo "Failed"'; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
    echo '  shift'; \
--- a/ollama-context-proxy/ollama-context-proxy.py
+++ b/ollama-context-proxy/ollama-context-proxy.py
@ -33,7 +33,7 @@ class OllamaContextProxy:
        else:
            # Fall back to host/port construction
            if ollama_host is None:
-                ollama_host = "localhost"
+                ollama_host = "ollama"
            self.ollama_base_url = f"http://{ollama_host}:{ollama_port}"
        self.proxy_port = proxy_port
@ -70,9 +70,52 @@ class OllamaContextProxy:
        # Optional: Add a health check endpoint
        app.router.add_get("/", self.health_check)
        app.router.add_get("/health", self.health_check)
        app.router.add_get("/debug/ollama", self.debug_ollama)
        return app
    async def debug_ollama(self, request: web.Request) -> web.Response:
        """Debug endpoint to test connectivity to Ollama"""
        if not self.session:
            return web.Response(
                text="Error: HTTP session not initialized",
                status=500,
                content_type="text/plain",
            )
        test_url = f"{self.ollama_base_url}/api/tags"
        try:
            # Test basic connectivity to Ollama
            self.logger.info(f"Testing Ollama connectivity to: {test_url}")
            async with self.session.get(test_url) as response:
                status = response.status
                content_type = response.headers.get("content-type", "N/A")
                body = await response.text()
                return web.Response(
                    text=f"Ollama Debug Test\n"
                    f"=================\n"
                    f"Target URL: {test_url}\n"
                    f"Status: {status}\n"
                    f"Content-Type: {content_type}\n"
                    f"Body Length: {len(body)}\n"
                    f"Body Preview: {body[:500]}...\n"
                    f"\nProxy Base URL: {self.ollama_base_url}\n"
                    f"Available Contexts: {self.available_contexts}",
                    content_type="text/plain",
                )
        except Exception as e:
            return web.Response(
                text=f"Ollama Debug Test FAILED\n"
                f"========================\n"
                f"Error: {str(e)}\n"
                f"Target URL: {test_url}\n"
                f"Proxy Base URL: {self.ollama_base_url}",
                status=502,
                content_type="text/plain",
            )
    async def health_check(self, request: web.Request) -> web.Response:
        """Health check endpoint"""
        return web.Response(
@ -86,7 +129,9 @@ class OllamaContextProxy:
            content_type="text/plain",
        )
-    async def proxy_handler(self, request: web.Request) -> web.Response:
+    async def proxy_handler(
        self, request: web.Request
    ) -> web.Response | web.StreamResponse:
        """Handle all proxy requests with context size extraction or auto-detection"""
        # Extract context spec and remaining path
@ -97,18 +142,32 @@ class OllamaContextProxy:
        if remaining_path.startswith("/"):
            remaining_path = remaining_path[1:]
-        # Get request data first (needed for auto-sizing)
+        # Get request data first (needed for auto-sizing) - read only once!
        original_data = None
        request_body = None
        if request.content_type == "application/json":
            try:
-                data = await request.json()
+                original_data = await request.json()
-            except json.JSONDecodeError:
+                # Convert back to bytes for forwarding
-                data = await request.text()
+                request_body = json.dumps(original_data).encode("utf-8")
            except json.JSONDecodeError as e:
                self.logger.error(f"Failed to parse JSON: {e}")
                request_body = await request.read()
                original_data = request_body.decode("utf-8", errors="ignore")
        else:
-            data = await request.read()
+            request_body = await request.read()
            original_data = request_body
        # Use original_data for analysis, request_body for forwarding
        data_for_analysis = original_data if original_data is not None else {}
        data_for_forwarding = request_body if request_body is not None else b""
        # Determine context size
        if context_spec == "auto":
-            context_size = self._auto_determine_context_size(data, remaining_path)
+            context_size = self._auto_determine_context_size(
                data_for_analysis, remaining_path
            )
        else:
            context_size = int(context_spec)
@ -130,22 +189,57 @@ class OllamaContextProxy:
        else:
            target_url = f"{self.ollama_base_url}/{remaining_path}"
-        self.logger.info(f"Routing to context {context_size} -> {target_url}")
+        # Enhanced debugging
        self.logger.info("=== REQUEST DEBUG ===")
        self.logger.info(f"Original request path: {request.path}")
        self.logger.info(f"Context spec: {context_spec}")
        self.logger.info(f"Remaining path: '{remaining_path}'")
        self.logger.info(f"Target URL: {target_url}")
        self.logger.info(f"Request method: {request.method}")
        self.logger.info(f"Request headers: {dict(request.headers)}")
        self.logger.info(f"Request query params: {dict(request.query)}")
        self.logger.info(f"Content type: {request.content_type}")
        if isinstance(data_for_analysis, dict):
            self.logger.info(f"Request data keys: {list(data_for_analysis.keys())}")
        else:
            data_len = (
                len(data_for_analysis)
                if hasattr(data_for_analysis, "__len__")
                else "N/A"
            )
            self.logger.info(
                f"Request data type: {type(data_for_analysis)}, length: {data_len}"
            )
        self.logger.info(f"Selected context size: {context_size}")
-        # Inject context if needed
+        # Inject context if needed (modify the JSON data, not the raw bytes)
-        if self._should_inject_context(remaining_path) and isinstance(data, dict):
+        modified_data = False
-            if "options" not in data:
+        if self._should_inject_context(remaining_path) and isinstance(
-                data["options"] = {}
+            data_for_analysis, dict
-            data["options"]["num_ctx"] = context_size
+        ):
            if "options" not in data_for_analysis:
                data_for_analysis["options"] = {}
            data_for_analysis["options"]["num_ctx"] = context_size
            self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}")
            # Re-encode the modified JSON
            data_for_forwarding = json.dumps(data_for_analysis).encode("utf-8")
            modified_data = True
        # Prepare headers (exclude hop-by-hop headers)
        headers = {
            key: value
            for key, value in request.headers.items()
-            if key.lower() not in ["host", "connection", "upgrade"]
+            if key.lower() not in ["host", "connection", "upgrade", "content-length"]
        }
        # Update Content-Length if we modified the data
        if modified_data and isinstance(data_for_forwarding, bytes):
            headers["Content-Length"] = str(len(data_for_forwarding))
        # Debug the final data being sent
        self.logger.debug(f"Final data being sent: {data_for_forwarding}")
        self.logger.debug(f"Final headers: {headers}")
        if not self.session:
            raise RuntimeError("HTTP session not initialized")
        try:
@ -153,10 +247,28 @@ class OllamaContextProxy:
            async with self.session.request(
                method=request.method,
                url=target_url,
-                data=json.dumps(data) if isinstance(data, dict) else data,
+                data=data_for_forwarding,
                headers=headers,
                params=request.query,
            ) as response:
                # Enhanced response debugging
                self.logger.info("=== RESPONSE DEBUG ===")
                self.logger.info(f"Response status: {response.status}")
                self.logger.info(f"Response headers: {dict(response.headers)}")
                self.logger.info(
                    f"Response content-type: {response.headers.get('content-type', 'N/A')}"
                )
                # Log response body for non-streaming 404s
                if response.status == 404:
                    error_body = await response.text()
                    self.logger.error(f"404 Error body: {error_body}")
                    return web.Response(
                        text=f"Ollama 404 Error - URL: {target_url}\nError: {error_body}",
                        status=404,
                        content_type="text/plain",
                    )
                # Handle streaming responses (for generate/chat endpoints)
                if response.headers.get("content-type", "").startswith(
                    "application/x-ndjson"
@ -323,13 +435,19 @@ async def main():
    )
    # Get default host from OLLAMA_BASE_URL if available
-    default_host = "localhost"
+    default_host = "ollama"  # Default to "ollama" for Docker environments
    base_url = os.getenv("OLLAMA_BASE_URL")
    if base_url:
        # Extract host from base URL for backward compatibility with CLI args
        parsed = urllib.parse.urlparse(base_url)
        if parsed.hostname:
            default_host = parsed.hostname
    else:
        # If no OLLAMA_BASE_URL, check if we're likely in a Docker environment
        if os.path.exists("/.dockerenv"):
            default_host = "ollama"
        else:
            default_host = "localhost"
    parser.add_argument(
        "--ollama-host",
--- a/ollama-context-proxy/requirements.txt
+++ b/ollama-context-proxy/requirements.txt
@ -2,11 +2,16 @@ aiohappyeyeballs==2.6.1
 aiohttp==3.12.15
 aiosignal==1.4.0
 attrs==25.3.0
 certifi==2025.7.14
 charset-normalizer==3.4.2
 frozenlist==1.7.0
 idna==3.10
 multidict==6.6.3
 propcache==0.3.2
 requests==2.32.4
 ruff==0.12.7
 setuptools==68.1.2
 typing_extensions==4.14.1
 urllib3==2.5.0
 wheel==0.42.0
 yarl==1.20.1
--- a/ollama-context-proxy/test-proxy.py
+++ b/ollama-context-proxy/test-proxy.py
@ -0,0 +1,140 @@
 #!/usr/bin/env python3
 """
 Simple test script for debugging the Ollama Context Proxy
 """
 import requests
 import sys
 def test_direct_ollama(base_url="http://localhost:11434"):
    """Test direct connection to Ollama"""
    print("=== Testing Direct Ollama Connection ===")
    print(f"URL: {base_url}")
    try:
        response = requests.get(f"{base_url}/api/tags", timeout=5)
        print(f"Status: {response.status_code}")
        print(f"Headers: {dict(response.headers)}")
        if response.status_code == 200:
            print("✅ Direct Ollama connection OK")
            tags_data = response.json()
            print(f"Available models: {[model['name'] for model in tags_data.get('models', [])]}")
        else:
            print(f"❌ Direct Ollama connection failed: {response.text}")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Direct Ollama connection error: {e}")
        return False
 def test_proxy_health(proxy_url="http://localhost:11435"):
    """Test proxy health endpoint"""
    print("\n=== Testing Proxy Health ===")
    print(f"URL: {proxy_url}")
    try:
        response = requests.get(f"{proxy_url}/health", timeout=5)
        print(f"Status: {response.status_code}")
        print(f"Response: {response.text}")
        if response.status_code == 200:
            print("✅ Proxy health check OK")
        else:
            print("❌ Proxy health check failed")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Proxy health check error: {e}")
        return False
 def test_proxy_debug(proxy_url="http://localhost:11435"):
    """Test proxy debug endpoint"""
    print("\n=== Testing Proxy Debug ===")
    print(f"URL: {proxy_url}/debug/ollama")
    try:
        response = requests.get(f"{proxy_url}/debug/ollama", timeout=10)
        print(f"Status: {response.status_code}")
        print(f"Response: {response.text}")
        if response.status_code == 200:
            print("✅ Proxy debug check OK")
        else:
            print("❌ Proxy debug check failed")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Proxy debug check error: {e}")
        return False
 def test_proxy_request(proxy_url="http://localhost:11435", model="llama2"):
    """Test actual proxy request"""
    print("\n=== Testing Proxy Request ===")
    print(f"URL: {proxy_url}/proxy-context/auto/api/generate")
    payload = {
        "model": model,
        "prompt": "Hello, world!",
        "stream": False
    }
    try:
        response = requests.post(
            f"{proxy_url}/proxy-context/auto/api/generate",
            json=payload,
            timeout=30
        )
        print(f"Status: {response.status_code}")
        print(f"Headers: {dict(response.headers)}")
        print(f"Response preview: {response.text[:500]}...")
        if response.status_code == 200:
            print("✅ Proxy request OK")
        else:
            print("❌ Proxy request failed")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Proxy request error: {e}")
        return False
 def main():
    if len(sys.argv) > 1:
        if sys.argv[1] == "--help":
            print("Usage: python3 test-proxy.py [ollama_url] [proxy_url] [model]")
            print("  ollama_url: Default http://localhost:11434")
            print("  proxy_url:  Default http://localhost:11435") 
            print("  model:      Default llama2")
            return
    ollama_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434"
    proxy_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11435"
    model = sys.argv[3] if len(sys.argv) > 3 else "llama2"
    print("Ollama Context Proxy Debug Test")
    print("===============================")
    print(f"Ollama URL: {ollama_url}")
    print(f"Proxy URL: {proxy_url}")
    print(f"Test Model: {model}")
    # Run tests
    tests = [
        ("Direct Ollama", lambda: test_direct_ollama(ollama_url)),
        ("Proxy Health", lambda: test_proxy_health(proxy_url)),
        ("Proxy Debug", lambda: test_proxy_debug(proxy_url)),
        ("Proxy Request", lambda: test_proxy_request(proxy_url, model)),
    ]
    results = []
    for test_name, test_func in tests:
        success = test_func()
        results.append((test_name, success))
    print("\n=== Summary ===")
    for test_name, success in results:
        status = "✅ PASS" if success else "❌ FAIL"
        print(f"{test_name}: {status}")
    all_passed = all(success for _, success in results)
    if all_passed:
        print("\n🎉 All tests passed!")
    else:
        print("\n⚠️  Some tests failed. Check the output above for details.")
        sys.exit(1)
 if __name__ == "__main__":
    main()