Skip to main content

Metrics with Prometheus

Install Prometheus

Kubernetes:
## Add Prometheus Helm repo
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update

## Install Prometheus stack
helm install prometheus prometheus-community/kube-prometheus-stack \
  --namespace monitoring \
  --create-namespace \
  --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false
Docker Compose:
services:
  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus-data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'

volumes:
  prometheus-data:

Prometheus Configuration

prometheus.yml:
global:
  scrape_interval: 15s
  evaluation_interval: 15s

scrape_configs:
  # MCP Server metrics
  - job_name: 'mcp-server-langgraph'
    static_configs:
      - targets: ['mcp-server-langgraph:9090']
    metrics_path: '/metrics'

  # Redis metrics
  - job_name: 'redis'
    static_configs:
      - targets: ['redis-exporter:9121']

  # PostgreSQL metrics
  - job_name: 'postgres'
    static_configs:
      - targets: ['postgres-exporter:9187']

  # Keycloak metrics
  - job_name: 'keycloak'
    static_configs:
      - targets: ['keycloak:8080']
    metrics_path: '/metrics'

  # Node exporter (system metrics)
  - job_name: 'node'
    static_configs:
      - targets: ['node-exporter:9100']

Application Metrics

Instrument FastAPI with Prometheus:
from prometheus_client import Counter, Histogram, Gauge, generate_latest
from fastapi import FastAPI, Request
import time

app = FastAPI()

## Define metrics
http_requests_total = Counter(
    'http_requests_total',
    'Total HTTP requests',
    ['method', 'endpoint', 'status']
)

http_request_duration_seconds = Histogram(
    'http_request_duration_seconds',
    'HTTP request latency',
    ['method', 'endpoint']
)

llm_requests_total = Counter(
    'llm_requests_total',
    'Total LLM requests',
    ['provider', 'model', 'status']
)

llm_tokens_total = Counter(
    'llm_tokens_total',
    'Total LLM tokens consumed',
    ['provider', 'model', 'type']
)

active_sessions = Gauge(
    'active_sessions',
    'Number of active user sessions'
)

openfga_checks_total = Counter(
    'openfga_checks_total',
    'Total OpenFGA authorization checks',
    ['result']
)

## Middleware to track metrics
@app.middleware("http")
async def track_metrics(request: Request, call_next):
    start_time = time.time()

    # Process request
    response = await call_next(request)

    # Record metrics
    duration = time.time() - start_time

    http_requests_total.labels(
        method=request.method,
        endpoint=request.url.path,
        status=response.status_code
    ).inc()

    http_request_duration_seconds.labels(
        method=request.method,
        endpoint=request.url.path
    ).observe(duration)

    return response

## Metrics endpoint
@app.get("/metrics")
async def metrics():
    return Response(
        content=generate_latest(),
        media_type="text/plain"
    )

Custom Business Metrics

from prometheus_client import Counter, Gauge

## User metrics
user_registrations_total = Counter(
    'user_registrations_total',
    'Total user registrations'
)

user_logins_total = Counter(
    'user_logins_total',
    'Total user logins',
    ['provider']
)

## Conversation metrics
conversations_created_total = Counter(
    'conversations_created_total',
    'Total conversations created'
)

messages_sent_total = Counter(
    'messages_sent_total',
    'Total messages sent',
    ['role']
)

active_conversations = Gauge(
    'active_conversations',
    'Number of active conversations'
)

## Tool usage metrics
tool_executions_total = Counter(
    'tool_executions_total',
    'Total tool executions',
    ['tool_name', 'status']
)

tool_execution_duration = Histogram(
    'tool_execution_duration_seconds',
    'Tool execution duration',
    ['tool_name']
)

## Usage in code
@app.post("/auth/register")
async def register(user_data: UserCreate):
    # ... registration logic ...
    user_registrations_total.inc()
    return user

@app.post("/chat")
async def chat(query: str):
    conversations_created_total.inc()
    messages_sent_total.labels(role="user").inc()

    # Execute LLM call
    start_time = time.time()
    response = await llm.ainvoke(query)
    duration = time.time() - start_time

    # Track LLM metrics
    llm_requests_total.labels(
        provider="anthropic",
        model="claude-sonnet-4-5-20250929",
        status="success"
    ).inc()

    llm_tokens_total.labels(
        provider="anthropic",
        model="claude-sonnet-4-5-20250929",
        type="prompt"
    ).inc(response.usage.prompt_tokens)

    llm_tokens_total.labels(
        provider="anthropic",
        model="claude-sonnet-4-5-20250929",
        type="completion"
    ).inc(response.usage.completion_tokens)

    messages_sent_total.labels(role="assistant").inc()

    return response

Next Steps

Distributed Tracing

Add distributed tracing with Jaeger

Grafana Dashboards

Create dashboards to visualize metrics

Alerting

Configure alerts based on metrics

Back to Overview

Return to monitoring overview