Begin adding prometheus metrics
This commit is contained in:
parent
406d1c1dc1
commit
02651412fe
@ -259,6 +259,9 @@ COPY /src/requirements.txt /opt/backstory/src/requirements.txt
|
|||||||
RUN pip install -r /opt/backstory/src/requirements.txt
|
RUN pip install -r /opt/backstory/src/requirements.txt
|
||||||
RUN pip install 'markitdown[all]' pydantic
|
RUN pip install 'markitdown[all]' pydantic
|
||||||
|
|
||||||
|
# Prometheus
|
||||||
|
RUN pip install prometheus-client prometheus-fastapi-instrumentator
|
||||||
|
|
||||||
SHELL [ "/bin/bash", "-c" ]
|
SHELL [ "/bin/bash", "-c" ]
|
||||||
|
|
||||||
RUN { \
|
RUN { \
|
||||||
|
0
cache/prometheus/.keep
vendored
Normal file
0
cache/prometheus/.keep
vendored
Normal file
@ -149,6 +149,23 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./cache:/root/.cache
|
- ./cache:/root/.cache
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus
|
||||||
|
container_name: prometheus
|
||||||
|
restart: "always"
|
||||||
|
# env_file:
|
||||||
|
# - .env
|
||||||
|
# devices:
|
||||||
|
# - /dev/dri:/dev/dri
|
||||||
|
ports:
|
||||||
|
- 9090:9090 # Prometheus
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
volumes:
|
||||||
|
# - ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
- ./cache/prometheus:/prometheus
|
||||||
|
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
internal:
|
internal:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
@ -37,6 +37,8 @@ try_import("uvicorn")
|
|||||||
try_import("numpy")
|
try_import("numpy")
|
||||||
try_import("umap")
|
try_import("umap")
|
||||||
try_import("sklearn")
|
try_import("sklearn")
|
||||||
|
try_import("prometheus_client")
|
||||||
|
try_import("prometheus_fastapi_instrumentator")
|
||||||
|
|
||||||
import ollama
|
import ollama
|
||||||
import requests
|
import requests
|
||||||
@ -48,6 +50,8 @@ import uvicorn # type: ignore
|
|||||||
import numpy as np # type: ignore
|
import numpy as np # type: ignore
|
||||||
import umap # type: ignore
|
import umap # type: ignore
|
||||||
from sklearn.preprocessing import MinMaxScaler # type: ignore
|
from sklearn.preprocessing import MinMaxScaler # type: ignore
|
||||||
|
from prometheus_client import Summary # type: ignore
|
||||||
|
from prometheus_fastapi_instrumentator import Instrumentator # type: ignore
|
||||||
|
|
||||||
from utils import (
|
from utils import (
|
||||||
rag as Rag,
|
rag as Rag,
|
||||||
@ -66,6 +70,8 @@ rags = [
|
|||||||
# { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
|
# { "name": "LKML", "enabled": False, "description": "Full associative data for entire LKML mailing list archive." },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')
|
||||||
|
|
||||||
system_message_old = f"""
|
system_message_old = f"""
|
||||||
Launched on {datetime.now().isoformat()}.
|
Launched on {datetime.now().isoformat()}.
|
||||||
|
|
||||||
@ -204,7 +210,7 @@ def parse_args():
|
|||||||
# %%
|
# %%
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
def is_valid_uuid(value):
|
def is_valid_uuid(value: str) -> bool:
|
||||||
try:
|
try:
|
||||||
uuid_obj = uuid.UUID(value, version=4)
|
uuid_obj = uuid.UUID(value, version=4)
|
||||||
return str(uuid_obj) == value
|
return str(uuid_obj) == value
|
||||||
@ -236,6 +242,8 @@ class WebServer:
|
|||||||
|
|
||||||
def __init__(self, llm, model=MODEL_NAME):
|
def __init__(self, llm, model=MODEL_NAME):
|
||||||
self.app = FastAPI(lifespan=self.lifespan)
|
self.app = FastAPI(lifespan=self.lifespan)
|
||||||
|
Instrumentator().instrument(self.app)
|
||||||
|
Instrumentator().expose(self.app)
|
||||||
self.contexts = {}
|
self.contexts = {}
|
||||||
self.llm = llm
|
self.llm = llm
|
||||||
self.model = model
|
self.model = model
|
||||||
@ -658,11 +666,12 @@ class WebServer:
|
|||||||
return JSONResponse({"status": "healthy"})
|
return JSONResponse({"status": "healthy"})
|
||||||
|
|
||||||
@self.app.get("/{path:path}")
|
@self.app.get("/{path:path}")
|
||||||
async def serve_static(path: str):
|
async def serve_static(path: str, request: Request):
|
||||||
full_path = os.path.join(defines.static_content, path)
|
full_path = os.path.join(defines.static_content, path)
|
||||||
if os.path.exists(full_path) and os.path.isfile(full_path):
|
if os.path.exists(full_path) and os.path.isfile(full_path):
|
||||||
logger.info(f"Serve static request for {full_path}")
|
logger.info(f"Serve static request for {full_path}")
|
||||||
return FileResponse(full_path)
|
return FileResponse(full_path)
|
||||||
|
|
||||||
logger.info(f"Serve index.html for {path}")
|
logger.info(f"Serve index.html for {path}")
|
||||||
return FileResponse(os.path.join(defines.static_content, "index.html"))
|
return FileResponse(os.path.join(defines.static_content, "index.html"))
|
||||||
|
|
||||||
@ -785,6 +794,7 @@ class WebServer:
|
|||||||
logger.info(f"Context {context_id} is not yet loaded.")
|
logger.info(f"Context {context_id} is not yet loaded.")
|
||||||
return self.load_or_create_context(context_id)
|
return self.load_or_create_context(context_id)
|
||||||
|
|
||||||
|
@REQUEST_TIME.time()
|
||||||
async def generate_response(self, context : Context, agent : Agent, prompt : str, options: Tunables | None) -> AsyncGenerator[Message, None]:
|
async def generate_response(self, context : Context, agent : Agent, prompt : str, options: Tunables | None) -> AsyncGenerator[Message, None]:
|
||||||
if not self.file_watcher:
|
if not self.file_watcher:
|
||||||
raise Exception("File watcher not initialized")
|
raise Exception("File watcher not initialized")
|
||||||
|
@ -26,6 +26,7 @@ def setup_logging(level=defines.logging_level) -> logging.Logger:
|
|||||||
|
|
||||||
# Now reduce verbosity for FastAPI, Uvicorn, Starlette
|
# Now reduce verbosity for FastAPI, Uvicorn, Starlette
|
||||||
for noisy_logger in ("uvicorn", "uvicorn.error", "uvicorn.access", "fastapi", "starlette"):
|
for noisy_logger in ("uvicorn", "uvicorn.error", "uvicorn.access", "fastapi", "starlette"):
|
||||||
|
#for noisy_logger in ("starlette"):
|
||||||
logging.getLogger(noisy_logger).setLevel(logging.WARNING)
|
logging.getLogger(noisy_logger).setLevel(logging.WARNING)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user