Fixed context proxy

2025-08-01 10:22:33 -07:00 · 2025-08-01 10:22:33 -07:00 · 2d2745a788
commit 2d2745a788
parent 8119cd8492
6 changed files with 296 additions and 20 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@ db/**
 cache/**
 ollama/**
 .env
+**/venv/**
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -80,9 +80,14 @@ services:
    env_file:
      - .env
    environment:
-      - OLLAMA_HOST=http://ollama:11434
+      - OLLAMA_BASE_URL=http://ollama:11434
+    volumes:
+      - ./ollama-context-proxy/venv:/opt/ollama-context-proxy/venv:rw            # Live mount for python venv
+      - ./ollama-context-proxy/ollama-context-proxy.py:/opt/ollama-context-proxy/ollama-context-proxy.py:rw
+      - ./ollama-context-proxy/requirements.txt:/opt/ollama-context-proxy/requirements.txt:rw
+      - ./ollama-context-proxy/test-proxy.py:/opt/ollama-context-proxy/test-proxy.py:rw
    ports:
-      - 11436:11434 # ollama-context-proxy port
+      - 11436:11435 # ollama-context-proxy port
    networks:
      - internal

--- a/ollama-context-proxy/Dockerfile
+++ b/ollama-context-proxy/Dockerfile
@ -38,7 +38,14 @@ RUN { \
    echo 'echo "Container: ollama-context-proxy"'; \
    echo 'set -e'; \
    echo 'echo "Setting pip environment to /opt/ollama-context-proxy"'; \
+    echo 'if [[ ! -d /opt/job_hunter/venv/bin ]]; then'; \
+    echo '  echo "Creating virtual environment at /opt/ollama-context-proxy/venv"'; \
+    echo '  python3 -m venv --clear --system-site-packages /opt/ollama-context-proxy/venv'; \
+    echo 'fi'; \
+    echo 'echo "Activating virtual environment at /opt/ollama-context-proxy/venv"'; \
    echo 'source /opt/ollama-context-proxy/venv/bin/activate'; \
+    echo 'echo "Installing requirements from /opt/ollama-context-proxy/requirements.txt"'; \
+    echo 'pip install -r /opt/ollama-context-proxy/requirements.txt >/dev/null || echo "Failed"'; \
    echo 'if [[ "${1}" == "/bin/bash" ]] || [[ "${1}" =~ ^(/opt/ollama-context-proxy/)?shell$ ]]; then'; \
    echo '  echo "Dropping to shell"'; \
    echo '  shift'; \
--- a/ollama-context-proxy/ollama-context-proxy.py
+++ b/ollama-context-proxy/ollama-context-proxy.py
@ -33,7 +33,7 @@ class OllamaContextProxy:
        else:
            # Fall back to host/port construction
            if ollama_host is None:
-                ollama_host = "localhost"
+                ollama_host = "ollama"
            self.ollama_base_url = f"http://{ollama_host}:{ollama_port}"

        self.proxy_port = proxy_port
@ -70,9 +70,52 @@ class OllamaContextProxy:
        # Optional: Add a health check endpoint
        app.router.add_get("/", self.health_check)
        app.router.add_get("/health", self.health_check)
+        app.router.add_get("/debug/ollama", self.debug_ollama)

        return app

+    async def debug_ollama(self, request: web.Request) -> web.Response:
+        """Debug endpoint to test connectivity to Ollama"""
+        if not self.session:
+            return web.Response(
+                text="Error: HTTP session not initialized",
+                status=500,
+                content_type="text/plain",
+            )
+
+        test_url = f"{self.ollama_base_url}/api/tags"
+        try:
+            # Test basic connectivity to Ollama
+            self.logger.info(f"Testing Ollama connectivity to: {test_url}")
+
+            async with self.session.get(test_url) as response:
+                status = response.status
+                content_type = response.headers.get("content-type", "N/A")
+                body = await response.text()
+
+                return web.Response(
+                    text=f"Ollama Debug Test\n"
+                    f"=================\n"
+                    f"Target URL: {test_url}\n"
+                    f"Status: {status}\n"
+                    f"Content-Type: {content_type}\n"
+                    f"Body Length: {len(body)}\n"
+                    f"Body Preview: {body[:500]}...\n"
+                    f"\nProxy Base URL: {self.ollama_base_url}\n"
+                    f"Available Contexts: {self.available_contexts}",
+                    content_type="text/plain",
+                )
+        except Exception as e:
+            return web.Response(
+                text=f"Ollama Debug Test FAILED\n"
+                f"========================\n"
+                f"Error: {str(e)}\n"
+                f"Target URL: {test_url}\n"
+                f"Proxy Base URL: {self.ollama_base_url}",
+                status=502,
+                content_type="text/plain",
+            )
+
    async def health_check(self, request: web.Request) -> web.Response:
        """Health check endpoint"""
        return web.Response(
@ -86,7 +129,9 @@ class OllamaContextProxy:
            content_type="text/plain",
        )

-    async def proxy_handler(self, request: web.Request) -> web.Response:
+    async def proxy_handler(
+        self, request: web.Request
+    ) -> web.Response | web.StreamResponse:
        """Handle all proxy requests with context size extraction or auto-detection"""

        # Extract context spec and remaining path
@ -97,18 +142,32 @@ class OllamaContextProxy:
        if remaining_path.startswith("/"):
            remaining_path = remaining_path[1:]

-        # Get request data first (needed for auto-sizing)
+        # Get request data first (needed for auto-sizing) - read only once!
+        original_data = None
+        request_body = None
+
        if request.content_type == "application/json":
            try:
-                data = await request.json()
-            except json.JSONDecodeError:
-                data = await request.text()
+                original_data = await request.json()
+                # Convert back to bytes for forwarding
+                request_body = json.dumps(original_data).encode("utf-8")
+            except json.JSONDecodeError as e:
+                self.logger.error(f"Failed to parse JSON: {e}")
+                request_body = await request.read()
+                original_data = request_body.decode("utf-8", errors="ignore")
        else:
-            data = await request.read()
+            request_body = await request.read()
+            original_data = request_body
+
+        # Use original_data for analysis, request_body for forwarding
+        data_for_analysis = original_data if original_data is not None else {}
+        data_for_forwarding = request_body if request_body is not None else b""

        # Determine context size
        if context_spec == "auto":
-            context_size = self._auto_determine_context_size(data, remaining_path)
+            context_size = self._auto_determine_context_size(
+                data_for_analysis, remaining_path
+            )
        else:
            context_size = int(context_spec)

@ -130,22 +189,57 @@ class OllamaContextProxy:
        else:
            target_url = f"{self.ollama_base_url}/{remaining_path}"

-        self.logger.info(f"Routing to context {context_size} -> {target_url}")
+        # Enhanced debugging
+        self.logger.info("=== REQUEST DEBUG ===")
+        self.logger.info(f"Original request path: {request.path}")
+        self.logger.info(f"Context spec: {context_spec}")
+        self.logger.info(f"Remaining path: '{remaining_path}'")
+        self.logger.info(f"Target URL: {target_url}")
+        self.logger.info(f"Request method: {request.method}")
+        self.logger.info(f"Request headers: {dict(request.headers)}")
+        self.logger.info(f"Request query params: {dict(request.query)}")
+        self.logger.info(f"Content type: {request.content_type}")
+        if isinstance(data_for_analysis, dict):
+            self.logger.info(f"Request data keys: {list(data_for_analysis.keys())}")
+        else:
+            data_len = (
+                len(data_for_analysis)
+                if hasattr(data_for_analysis, "__len__")
+                else "N/A"
+            )
+            self.logger.info(
+                f"Request data type: {type(data_for_analysis)}, length: {data_len}"
+            )
+        self.logger.info(f"Selected context size: {context_size}")

-        # Inject context if needed
-        if self._should_inject_context(remaining_path) and isinstance(data, dict):
-            if "options" not in data:
-                data["options"] = {}
-            data["options"]["num_ctx"] = context_size
+        # Inject context if needed (modify the JSON data, not the raw bytes)
+        modified_data = False
+        if self._should_inject_context(remaining_path) and isinstance(
+            data_for_analysis, dict
+        ):
+            if "options" not in data_for_analysis:
+                data_for_analysis["options"] = {}
+            data_for_analysis["options"]["num_ctx"] = context_size
            self.logger.info(f"Injected num_ctx={context_size} for {remaining_path}")
+            # Re-encode the modified JSON
+            data_for_forwarding = json.dumps(data_for_analysis).encode("utf-8")
+            modified_data = True

        # Prepare headers (exclude hop-by-hop headers)
        headers = {
            key: value
            for key, value in request.headers.items()
-            if key.lower() not in ["host", "connection", "upgrade"]
+            if key.lower() not in ["host", "connection", "upgrade", "content-length"]
        }

+        # Update Content-Length if we modified the data
+        if modified_data and isinstance(data_for_forwarding, bytes):
+            headers["Content-Length"] = str(len(data_for_forwarding))
+
+        # Debug the final data being sent
+        self.logger.debug(f"Final data being sent: {data_for_forwarding}")
+        self.logger.debug(f"Final headers: {headers}")
+
        if not self.session:
            raise RuntimeError("HTTP session not initialized")
        try:
@ -153,10 +247,28 @@ class OllamaContextProxy:
            async with self.session.request(
                method=request.method,
                url=target_url,
-                data=json.dumps(data) if isinstance(data, dict) else data,
+                data=data_for_forwarding,
                headers=headers,
                params=request.query,
            ) as response:
+                # Enhanced response debugging
+                self.logger.info("=== RESPONSE DEBUG ===")
+                self.logger.info(f"Response status: {response.status}")
+                self.logger.info(f"Response headers: {dict(response.headers)}")
+                self.logger.info(
+                    f"Response content-type: {response.headers.get('content-type', 'N/A')}"
+                )
+
+                # Log response body for non-streaming 404s
+                if response.status == 404:
+                    error_body = await response.text()
+                    self.logger.error(f"404 Error body: {error_body}")
+                    return web.Response(
+                        text=f"Ollama 404 Error - URL: {target_url}\nError: {error_body}",
+                        status=404,
+                        content_type="text/plain",
+                    )
+
                # Handle streaming responses (for generate/chat endpoints)
                if response.headers.get("content-type", "").startswith(
                    "application/x-ndjson"
@ -323,13 +435,19 @@ async def main():
    )

    # Get default host from OLLAMA_BASE_URL if available
-    default_host = "localhost"
+    default_host = "ollama"  # Default to "ollama" for Docker environments
    base_url = os.getenv("OLLAMA_BASE_URL")
    if base_url:
        # Extract host from base URL for backward compatibility with CLI args
        parsed = urllib.parse.urlparse(base_url)
        if parsed.hostname:
            default_host = parsed.hostname
+    else:
+        # If no OLLAMA_BASE_URL, check if we're likely in a Docker environment
+        if os.path.exists("/.dockerenv"):
+            default_host = "ollama"
+        else:
+            default_host = "localhost"

    parser.add_argument(
        "--ollama-host",
--- a/ollama-context-proxy/requirements.txt
+++ b/ollama-context-proxy/requirements.txt
@ -2,11 +2,16 @@ aiohappyeyeballs==2.6.1
 aiohttp==3.12.15
 aiosignal==1.4.0
 attrs==25.3.0
+certifi==2025.7.14
+charset-normalizer==3.4.2
 frozenlist==1.7.0
 idna==3.10
 multidict==6.6.3
 propcache==0.3.2
+requests==2.32.4
+ruff==0.12.7
 setuptools==68.1.2
 typing_extensions==4.14.1
+urllib3==2.5.0
 wheel==0.42.0
-yarl==1.20.1
+yarl==1.20.1
--- a/ollama-context-proxy/test-proxy.py
+++ b/ollama-context-proxy/test-proxy.py
@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+Simple test script for debugging the Ollama Context Proxy
+"""
+
+import requests
+import sys
+
+def test_direct_ollama(base_url="http://localhost:11434"):
+    """Test direct connection to Ollama"""
+    print("=== Testing Direct Ollama Connection ===")
+    print(f"URL: {base_url}")
+    
+    try:
+        response = requests.get(f"{base_url}/api/tags", timeout=5)
+        print(f"Status: {response.status_code}")
+        print(f"Headers: {dict(response.headers)}")
+        if response.status_code == 200:
+            print("✅ Direct Ollama connection OK")
+            tags_data = response.json()
+            print(f"Available models: {[model['name'] for model in tags_data.get('models', [])]}")
+        else:
+            print(f"❌ Direct Ollama connection failed: {response.text}")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"❌ Direct Ollama connection error: {e}")
+        return False
+
+def test_proxy_health(proxy_url="http://localhost:11435"):
+    """Test proxy health endpoint"""
+    print("\n=== Testing Proxy Health ===")
+    print(f"URL: {proxy_url}")
+    
+    try:
+        response = requests.get(f"{proxy_url}/health", timeout=5)
+        print(f"Status: {response.status_code}")
+        print(f"Response: {response.text}")
+        if response.status_code == 200:
+            print("✅ Proxy health check OK")
+        else:
+            print("❌ Proxy health check failed")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"❌ Proxy health check error: {e}")
+        return False
+
+def test_proxy_debug(proxy_url="http://localhost:11435"):
+    """Test proxy debug endpoint"""
+    print("\n=== Testing Proxy Debug ===")
+    print(f"URL: {proxy_url}/debug/ollama")
+    
+    try:
+        response = requests.get(f"{proxy_url}/debug/ollama", timeout=10)
+        print(f"Status: {response.status_code}")
+        print(f"Response: {response.text}")
+        if response.status_code == 200:
+            print("✅ Proxy debug check OK")
+        else:
+            print("❌ Proxy debug check failed")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"❌ Proxy debug check error: {e}")
+        return False
+
+def test_proxy_request(proxy_url="http://localhost:11435", model="llama2"):
+    """Test actual proxy request"""
+    print("\n=== Testing Proxy Request ===")
+    print(f"URL: {proxy_url}/proxy-context/auto/api/generate")
+    
+    payload = {
+        "model": model,
+        "prompt": "Hello, world!",
+        "stream": False
+    }
+    
+    try:
+        response = requests.post(
+            f"{proxy_url}/proxy-context/auto/api/generate",
+            json=payload,
+            timeout=30
+        )
+        print(f"Status: {response.status_code}")
+        print(f"Headers: {dict(response.headers)}")
+        print(f"Response preview: {response.text[:500]}...")
+        
+        if response.status_code == 200:
+            print("✅ Proxy request OK")
+        else:
+            print("❌ Proxy request failed")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"❌ Proxy request error: {e}")
+        return False
+
+def main():
+    if len(sys.argv) > 1:
+        if sys.argv[1] == "--help":
+            print("Usage: python3 test-proxy.py [ollama_url] [proxy_url] [model]")
+            print("  ollama_url: Default http://localhost:11434")
+            print("  proxy_url:  Default http://localhost:11435") 
+            print("  model:      Default llama2")
+            return
+    
+    ollama_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:11434"
+    proxy_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11435"
+    model = sys.argv[3] if len(sys.argv) > 3 else "llama2"
+    
+    print("Ollama Context Proxy Debug Test")
+    print("===============================")
+    print(f"Ollama URL: {ollama_url}")
+    print(f"Proxy URL: {proxy_url}")
+    print(f"Test Model: {model}")
+    
+    # Run tests
+    tests = [
+        ("Direct Ollama", lambda: test_direct_ollama(ollama_url)),
+        ("Proxy Health", lambda: test_proxy_health(proxy_url)),
+        ("Proxy Debug", lambda: test_proxy_debug(proxy_url)),
+        ("Proxy Request", lambda: test_proxy_request(proxy_url, model)),
+    ]
+    
+    results = []
+    for test_name, test_func in tests:
+        success = test_func()
+        results.append((test_name, success))
+    
+    print("\n=== Summary ===")
+    for test_name, success in results:
+        status = "✅ PASS" if success else "❌ FAIL"
+        print(f"{test_name}: {status}")
+    
+    all_passed = all(success for _, success in results)
+    if all_passed:
+        print("\n🎉 All tests passed!")
+    else:
+        print("\n⚠️  Some tests failed. Check the output above for details.")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()