Begin adding prometheus metrics

This commit is contained in:
James Ketr 2025-05-05 13:30:29 -07:00
parent 406d1c1dc1
commit 02651412fe
5 changed files with 34 additions and 3 deletions

View File

@ -259,6 +259,9 @@ COPY /src/requirements.txt /opt/backstory/src/requirements.txt
RUN pip install -r /opt/backstory/src/requirements.txt
RUN pip install 'markitdown[all]' pydantic
# Prometheus
RUN pip install prometheus-client prometheus-fastapi-instrumentator
SHELL [ "/bin/bash", "-c" ]
RUN { \

0
cache/prometheus/.keep vendored Normal file
View File

View File

@ -149,6 +149,23 @@ services:
volumes:
- ./cache:/root/.cache
prometheus:
image: prom/prometheus
container_name: prometheus
restart: "always"
# env_file:
# - .env
# devices:
# - /dev/dri:/dev/dri
ports:
- 9090:9090 # Prometheus
networks:
- internal
volumes:
# - ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./cache/prometheus:/prometheus
networks:
internal:
driver: bridge

View File

@ -37,6 +37,8 @@ try_import("uvicorn")
try_import("numpy")
try_import("umap")
try_import("sklearn")
try_import("prometheus_client")
try_import("prometheus_fastapi_instrumentator")
import ollama
import requests
@ -48,6 +50,8 @@ import uvicorn # type: ignore
import numpy as np # type: ignore
import umap # type: ignore
from sklearn.preprocessing import MinMaxScaler # type: ignore
from prometheus_client import Summary # type: ignore
from prometheus_fastapi_instrumentator import Instrumentator # type: ignore
from utils import (
rag as Rag,
@ -66,6 +70,8 @@ rags = [
# { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
]
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')
system_message_old = f"""
Launched on {datetime.now().isoformat()}.
@ -204,7 +210,7 @@ def parse_args():
# %%
# %%
def is_valid_uuid(value):
def is_valid_uuid(value: str) -> bool:
try:
uuid_obj = uuid.UUID(value, version=4)
return str(uuid_obj) == value
@ -236,6 +242,8 @@ class WebServer:
def __init__(self, llm, model=MODEL_NAME):
self.app = FastAPI(lifespan=self.lifespan)
Instrumentator().instrument(self.app)
Instrumentator().expose(self.app)
self.contexts = {}
self.llm = llm
self.model = model
@ -658,14 +666,15 @@ class WebServer:
return JSONResponse({"status": "healthy"})
@self.app.get("/{path:path}")
async def serve_static(path: str):
async def serve_static(path: str, request: Request):
full_path = os.path.join(defines.static_content, path)
if os.path.exists(full_path) and os.path.isfile(full_path):
logger.info(f"Serve static request for {full_path}")
return FileResponse(full_path)
logger.info(f"Serve index.html for {path}")
return FileResponse(os.path.join(defines.static_content, "index.html"))
def save_context(self, context_id):
"""
Serialize a Python dictionary to a file in the agents directory.
@ -785,6 +794,7 @@ class WebServer:
logger.info(f"Context {context_id} is not yet loaded.")
return self.load_or_create_context(context_id)
@REQUEST_TIME.time()
async def generate_response(self, context : Context, agent : Agent, prompt : str, options: Tunables | None) -> AsyncGenerator[Message, None]:
if not self.file_watcher:
raise Exception("File watcher not initialized")

View File

@ -26,6 +26,7 @@ def setup_logging(level=defines.logging_level) -> logging.Logger:
# Now reduce verbosity for FastAPI, Uvicorn, Starlette
for noisy_logger in ("uvicorn", "uvicorn.error", "uvicorn.access", "fastapi", "starlette"):
#for noisy_logger in ("starlette"):
logging.getLogger(noisy_logger).setLevel(logging.WARNING)
logger = logging.getLogger(__name__)