Metrics with Prometheus
Install Prometheus
Kubernetes:## Add Prometheus Helm repo
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
## Install Prometheus stack
helm install prometheus prometheus-community/kube-prometheus-stack \
--namespace monitoring \
--create-namespace \
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
volumes:
prometheus-data:
Prometheus Configuration
prometheus.yml:global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# MCP Server metrics
- job_name: 'mcp-server-langgraph'
static_configs:
- targets: ['mcp-server-langgraph:9090']
metrics_path: '/metrics'
# Redis metrics
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
# PostgreSQL metrics
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
# Keycloak metrics
- job_name: 'keycloak'
static_configs:
- targets: ['keycloak:8080']
metrics_path: '/metrics'
# Node exporter (system metrics)
- job_name: 'node'
static_configs:
- targets: ['node-exporter:9100']
Application Metrics
Instrument FastAPI with Prometheus:from prometheus_client import Counter, Histogram, Gauge, generate_latest
from fastapi import FastAPI, Request
import time
app = FastAPI()
## Define metrics
http_requests_total = Counter(
'http_requests_total',
'Total HTTP requests',
['method', 'endpoint', 'status']
)
http_request_duration_seconds = Histogram(
'http_request_duration_seconds',
'HTTP request latency',
['method', 'endpoint']
)
llm_requests_total = Counter(
'llm_requests_total',
'Total LLM requests',
['provider', 'model', 'status']
)
llm_tokens_total = Counter(
'llm_tokens_total',
'Total LLM tokens consumed',
['provider', 'model', 'type']
)
active_sessions = Gauge(
'active_sessions',
'Number of active user sessions'
)
openfga_checks_total = Counter(
'openfga_checks_total',
'Total OpenFGA authorization checks',
['result']
)
## Middleware to track metrics
@app.middleware("http")
async def track_metrics(request: Request, call_next):
start_time = time.time()
# Process request
response = await call_next(request)
# Record metrics
duration = time.time() - start_time
http_requests_total.labels(
method=request.method,
endpoint=request.url.path,
status=response.status_code
).inc()
http_request_duration_seconds.labels(
method=request.method,
endpoint=request.url.path
).observe(duration)
return response
## Metrics endpoint
@app.get("/metrics")
async def metrics():
return Response(
content=generate_latest(),
media_type="text/plain"
)
Custom Business Metrics
from prometheus_client import Counter, Gauge
## User metrics
user_registrations_total = Counter(
'user_registrations_total',
'Total user registrations'
)
user_logins_total = Counter(
'user_logins_total',
'Total user logins',
['provider']
)
## Conversation metrics
conversations_created_total = Counter(
'conversations_created_total',
'Total conversations created'
)
messages_sent_total = Counter(
'messages_sent_total',
'Total messages sent',
['role']
)
active_conversations = Gauge(
'active_conversations',
'Number of active conversations'
)
## Tool usage metrics
tool_executions_total = Counter(
'tool_executions_total',
'Total tool executions',
['tool_name', 'status']
)
tool_execution_duration = Histogram(
'tool_execution_duration_seconds',
'Tool execution duration',
['tool_name']
)
## Usage in code
@app.post("/auth/register")
async def register(user_data: UserCreate):
# ... registration logic ...
user_registrations_total.inc()
return user
@app.post("/chat")
async def chat(query: str):
conversations_created_total.inc()
messages_sent_total.labels(role="user").inc()
# Execute LLM call
start_time = time.time()
response = await llm.ainvoke(query)
duration = time.time() - start_time
# Track LLM metrics
llm_requests_total.labels(
provider="anthropic",
model="claude-sonnet-4-5-20250929",
status="success"
).inc()
llm_tokens_total.labels(
provider="anthropic",
model="claude-sonnet-4-5-20250929",
type="prompt"
).inc(response.usage.prompt_tokens)
llm_tokens_total.labels(
provider="anthropic",
model="claude-sonnet-4-5-20250929",
type="completion"
).inc(response.usage.completion_tokens)
messages_sent_total.labels(role="assistant").inc()
return response
Next Steps
Distributed Tracing
Add distributed tracing with Jaeger
Grafana Dashboards
Create dashboards to visualize metrics
Alerting
Configure alerts based on metrics
Back to Overview
Return to monitoring overview