[Course][Udemy][Gourav Shah] Ultimate DevOps to MLOps Bootcamp - Build ML CI-CD Pipelines [ENG, 2025] : 09. Monitoring a ML Model

Делаю:
2025.12.30

// OpenTelemetry FastAPI Instrumentation https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/fastapi/fastapi.html

src/api/requirements.txt

prometheus-fastapi-instrumentator==6.1.0

src/api/main.py

from prometheus_fastapi_instrumentator import Instrumentator

После

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

Добавляю:

# Initialize and instrument Prometheus metrics
Instrumentator().instrument(app).expose(app)

$ kubectl rollout restart deployment model

$ cat <<EOF | kubectl apply -f -
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: house-price-api-monitor
  labels:
    release: prom
spec:
  selector:
    matchLabels:
      app: model
  namespaceSelector:
    matchNames:
      - default
  endpoints:
    - port: "8000"
      path: /metrics
      interval: 15s
EOF

// OK!
$ curl http://localhost:30100/metrics

// swagger
http://localhost:30100/docs

// prometheus
http://localhost:30300/targets

enhanced_fastapi_ml_dashboard.json

https://gist.github.com/gouravjshah/ca57251c80bc2f4a2adde0a878ebc585

{
  "title": "ML Model API - Full Observability Dashboard",
  "timezone": "browser",
  "refresh": "10s",
  "schemaVersion": 30,
  "version": 1,
  "panels": [
    {
      "type": "timeseries",
      "title": "Request Rate (Total per Endpoint)",
      "targets": [
        {
          "expr": "sum(rate(http_requests_total[1m])) by (handler)",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 }
    },
    {
      "type": "timeseries",
      "title": "Latency (95th Percentile)",
      "targets": [
        {
          "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[1m])) by (le, handler))",
          "legendFormat": "",
          "refId": "B"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 }
    },
    {
      "type": "timeseries",
      "title": "Error Rate (4xx/5xx Responses)",
      "targets": [
        {
          "expr": "sum(rate(http_requests_total{status=~\"4..|5..\"}[1m])) by (handler)",
          "legendFormat": "",
          "refId": "C"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": { "x": 0, "y": 8, "w": 12, "h": 8 }
    },
    {
      "type": "timeseries",
      "title": "Request Size (Bytes/sec)",
      "targets": [
        {
          "expr": "sum(rate(http_request_size_bytes_sum[1m])) by (handler)",
          "legendFormat": "",
          "refId": "D"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": { "x": 12, "y": 8, "w": 12, "h": 8 }
    },
    {
      "type": "timeseries",
      "title": "Response Size (Bytes/sec)",
      "targets": [
        {
          "expr": "sum(rate(http_response_size_bytes_sum[1m])) by (handler)",
          "legendFormat": "",
          "refId": "E"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": { "x": 0, "y": 16, "w": 12, "h": 8 }
    },
    {
      "type": "timeseries",
      "title": "In-Flight Requests",
      "targets": [
        {
          "expr": "http_request_duration_seconds_count - ignoring(le) group_left sum(http_request_duration_seconds_bucket) by (handler)",
          "legendFormat": "",
          "refId": "F"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": { "x": 12, "y": 16, "w": 12, "h": 8 }
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "type": "dashboard",
        "name": "Annotations & Alerts",
        "enable": true
      }
    ]
  }
}

fastapi_prom_grafana_dashboard.json

https://gist.github.com/gouravjshah/2dd5482c36bc9c2111e036fb70916249

{
  "title": "FastAPI Prometheus Metrics Dashboard",
  "timezone": "browser",
  "refresh": "10s",
  "schemaVersion": 30,
  "version": 1,
  "panels": [
    {
      "type": "timeseries",
      "title": "Request Rate (Total)",
      "targets": [
        {
          "expr": "sum(rate(http_requests_total[1m])) by (handler)",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": {
        "x": 0,
        "y": 0,
        "w": 12,
        "h": 8
      }
    },
    {
      "type": "timeseries",
      "title": "Latency (95th Percentile)",
      "targets": [
        {
          "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[1m])) by (le, handler))",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": {
        "x": 12,
        "y": 0,
        "w": 12,
        "h": 8
      }
    },
    {
      "type": "timeseries",
      "title": "Request Size (Bytes/sec)",
      "targets": [
        {
          "expr": "rate(http_request_size_bytes_sum[1m])",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": {
        "x": 0,
        "y": 8,
        "w": 12,
        "h": 8
      }
    },
    {
      "type": "timeseries",
      "title": "Response Size (Bytes/sec)",
      "targets": [
        {
          "expr": "rate(http_response_size_bytes_sum[1m])",
          "legendFormat": "",
          "refId": "A"
        }
      ],
      "datasource": "Prometheus",
      "gridPos": {
        "x": 12,
        "y": 8,
        "w": 12,
        "h": 8
      }
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "type": "dashboard",
        "name": "Annotations & Alerts",
        "enable": true
      }
    ]
  }
}

// import grafana dashboard
http://localhost:30200/dashboard/import