Metrics with Prometheus
Install Prometheus
Kubernetes:Copy
Ask AI
## Add Prometheus Helm repo
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
## Install Prometheus stack
helm install prometheus prometheus-community/kube-prometheus-stack \
--namespace monitoring \
--create-namespace \
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false
Copy
Ask AI
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
volumes:
prometheus-data:
Prometheus Configuration
prometheus.yml:Copy
Ask AI
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# MCP Server metrics
- job_name: 'mcp-server-langgraph'
static_configs:
- targets: ['mcp-server-langgraph:9090']
metrics_path: '/metrics'
# Redis metrics
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
# PostgreSQL metrics
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
# Keycloak metrics
- job_name: 'keycloak'
static_configs:
- targets: ['keycloak:8080']
metrics_path: '/metrics'
# Node exporter (system metrics)
- job_name: 'node'
static_configs:
- targets: ['node-exporter:9100']
Application Metrics
Instrument FastAPI with Prometheus:Copy
Ask AI
from prometheus_client import Counter, Histogram, Gauge, generate_latest
from fastapi import FastAPI, Request
import time
app = FastAPI()
## Define metrics
http_requests_total = Counter(
'http_requests_total',
'Total HTTP requests',
['method', 'endpoint', 'status']
)
http_request_duration_seconds = Histogram(
'http_request_duration_seconds',
'HTTP request latency',
['method', 'endpoint']
)
llm_requests_total = Counter(
'llm_requests_total',
'Total LLM requests',
['provider', 'model', 'status']
)
llm_tokens_total = Counter(
'llm_tokens_total',
'Total LLM tokens consumed',
['provider', 'model', 'type']
)
active_sessions = Gauge(
'active_sessions',
'Number of active user sessions'
)
openfga_checks_total = Counter(
'openfga_checks_total',
'Total OpenFGA authorization checks',
['result']
)
## Middleware to track metrics
@app.middleware("http")
async def track_metrics(request: Request, call_next):
start_time = time.time()
# Process request
response = await call_next(request)
# Record metrics
duration = time.time() - start_time
http_requests_total.labels(
method=request.method,
endpoint=request.url.path,
status=response.status_code
).inc()
http_request_duration_seconds.labels(
method=request.method,
endpoint=request.url.path
).observe(duration)
return response
## Metrics endpoint
@app.get("/metrics")
async def metrics():
return Response(
content=generate_latest(),
media_type="text/plain"
)
Custom Business Metrics
Copy
Ask AI
from prometheus_client import Counter, Gauge
## User metrics
user_registrations_total = Counter(
'user_registrations_total',
'Total user registrations'
)
user_logins_total = Counter(
'user_logins_total',
'Total user logins',
['provider']
)
## Conversation metrics
conversations_created_total = Counter(
'conversations_created_total',
'Total conversations created'
)
messages_sent_total = Counter(
'messages_sent_total',
'Total messages sent',
['role']
)
active_conversations = Gauge(
'active_conversations',
'Number of active conversations'
)
## Tool usage metrics
tool_executions_total = Counter(
'tool_executions_total',
'Total tool executions',
['tool_name', 'status']
)
tool_execution_duration = Histogram(
'tool_execution_duration_seconds',
'Tool execution duration',
['tool_name']
)
## Usage in code
@app.post("/auth/register")
async def register(user_data: UserCreate):
# ... registration logic ...
user_registrations_total.inc()
return user
@app.post("/chat")
async def chat(query: str):
conversations_created_total.inc()
messages_sent_total.labels(role="user").inc()
# Execute LLM call
start_time = time.time()
response = await llm.ainvoke(query)
duration = time.time() - start_time
# Track LLM metrics
llm_requests_total.labels(
provider="anthropic",
model="claude-sonnet-4-5-20250929",
status="success"
).inc()
llm_tokens_total.labels(
provider="anthropic",
model="claude-sonnet-4-5-20250929",
type="prompt"
).inc(response.usage.prompt_tokens)
llm_tokens_total.labels(
provider="anthropic",
model="claude-sonnet-4-5-20250929",
type="completion"
).inc(response.usage.completion_tokens)
messages_sent_total.labels(role="assistant").inc()
return response