From 02651412fec73f4fa48585817c16fb7703bdf247 Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Mon, 5 May 2025 13:30:29 -0700 Subject: [PATCH] Begin adding prometheus metrics --- Dockerfile | 3 +++ cache/prometheus/.keep | 0 docker-compose.yml | 17 +++++++++++++++++ src/server.py | 16 +++++++++++++--- src/utils/setup_logging.py | 1 + 5 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 cache/prometheus/.keep diff --git a/Dockerfile b/Dockerfile index afce113..2ab4b4b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -259,6 +259,9 @@ COPY /src/requirements.txt /opt/backstory/src/requirements.txt RUN pip install -r /opt/backstory/src/requirements.txt RUN pip install 'markitdown[all]' pydantic +# Prometheus +RUN pip install prometheus-client prometheus-fastapi-instrumentator + SHELL [ "/bin/bash", "-c" ] RUN { \ diff --git a/cache/prometheus/.keep b/cache/prometheus/.keep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml index 7a651fc..1f40836 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -149,6 +149,23 @@ services: volumes: - ./cache:/root/.cache + prometheus: + image: prom/prometheus + container_name: prometheus + restart: "always" +# env_file: +# - .env +# devices: +# - /dev/dri:/dev/dri + ports: + - 9090:9090 # Prometheus + networks: + - internal + volumes: +# - ./prometheus.yml:/etc/prometheus/prometheus.yml + - ./cache/prometheus:/prometheus + + networks: internal: driver: bridge diff --git a/src/server.py b/src/server.py index 490374f..caabd84 100644 --- a/src/server.py +++ b/src/server.py @@ -37,6 +37,8 @@ try_import("uvicorn") try_import("numpy") try_import("umap") try_import("sklearn") +try_import("prometheus_client") +try_import("prometheus_fastapi_instrumentator") import ollama import requests @@ -48,6 +50,8 @@ import uvicorn # type: ignore import numpy as np # type: ignore import umap # type: ignore from sklearn.preprocessing import MinMaxScaler # type: ignore +from prometheus_client import Summary # type: ignore +from prometheus_fastapi_instrumentator import Instrumentator # type: ignore from utils import ( rag as Rag, @@ -66,6 +70,8 @@ rags = [ # { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." }, ] +REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request') + system_message_old = f""" Launched on {datetime.now().isoformat()}. @@ -204,7 +210,7 @@ def parse_args(): # %% # %% -def is_valid_uuid(value): +def is_valid_uuid(value: str) -> bool: try: uuid_obj = uuid.UUID(value, version=4) return str(uuid_obj) == value @@ -236,6 +242,8 @@ class WebServer: def __init__(self, llm, model=MODEL_NAME): self.app = FastAPI(lifespan=self.lifespan) + Instrumentator().instrument(self.app) + Instrumentator().expose(self.app) self.contexts = {} self.llm = llm self.model = model @@ -658,14 +666,15 @@ class WebServer: return JSONResponse({"status": "healthy"}) @self.app.get("/{path:path}") - async def serve_static(path: str): + async def serve_static(path: str, request: Request): full_path = os.path.join(defines.static_content, path) if os.path.exists(full_path) and os.path.isfile(full_path): logger.info(f"Serve static request for {full_path}") return FileResponse(full_path) + logger.info(f"Serve index.html for {path}") return FileResponse(os.path.join(defines.static_content, "index.html")) - + def save_context(self, context_id): """ Serialize a Python dictionary to a file in the agents directory. @@ -785,6 +794,7 @@ class WebServer: logger.info(f"Context {context_id} is not yet loaded.") return self.load_or_create_context(context_id) + @REQUEST_TIME.time() async def generate_response(self, context : Context, agent : Agent, prompt : str, options: Tunables | None) -> AsyncGenerator[Message, None]: if not self.file_watcher: raise Exception("File watcher not initialized") diff --git a/src/utils/setup_logging.py b/src/utils/setup_logging.py index 650aa76..d2c6655 100644 --- a/src/utils/setup_logging.py +++ b/src/utils/setup_logging.py @@ -26,6 +26,7 @@ def setup_logging(level=defines.logging_level) -> logging.Logger: # Now reduce verbosity for FastAPI, Uvicorn, Starlette for noisy_logger in ("uvicorn", "uvicorn.error", "uvicorn.access", "fastapi", "starlette"): + #for noisy_logger in ("starlette"): logging.getLogger(noisy_logger).setLevel(logging.WARNING) logger = logging.getLogger(__name__)