[Course][Udemy][Gourav Shah] Ultimate DevOps to MLOps Bootcamp - Build ML CI-CD Pipelines [ENG, 2025] : 09. Monitoring a ML Model
Делаю:
2025.12.30
// OpenTelemetry FastAPI Instrumentation https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/fastapi/fastapi.html
src/api/requirements.txt
prometheus-fastapi-instrumentator==6.1.0
src/api/main.py
from prometheus_fastapi_instrumentator import Instrumentator
После
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
Добавляю:
# Initialize and instrument Prometheus metrics
Instrumentator().instrument(app).expose(app)
$ kubectl rollout restart deployment model
$ cat <<EOF | kubectl apply -f -
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: house-price-api-monitor
labels:
release: prom
spec:
selector:
matchLabels:
app: model
namespaceSelector:
matchNames:
- default
endpoints:
- port: "8000"
path: /metrics
interval: 15s
EOF
// OK!
$ curl http://localhost:30100/metrics
// swagger
http://localhost:30100/docs
// prometheus
http://localhost:30300/targets
enhanced_fastapi_ml_dashboard.json
https://gist.github.com/gouravjshah/ca57251c80bc2f4a2adde0a878ebc585
{
"title": "ML Model API - Full Observability Dashboard",
"timezone": "browser",
"refresh": "10s",
"schemaVersion": 30,
"version": 1,
"panels": [
{
"type": "timeseries",
"title": "Request Rate (Total per Endpoint)",
"targets": [
{
"expr": "sum(rate(http_requests_total[1m])) by (handler)",
"legendFormat": "",
"refId": "A"
}
],
"datasource": "Prometheus",
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 }
},
{
"type": "timeseries",
"title": "Latency (95th Percentile)",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[1m])) by (le, handler))",
"legendFormat": "",
"refId": "B"
}
],
"datasource": "Prometheus",
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 }
},
{
"type": "timeseries",
"title": "Error Rate (4xx/5xx Responses)",
"targets": [
{
"expr": "sum(rate(http_requests_total{status=~\"4..|5..\"}[1m])) by (handler)",
"legendFormat": "",
"refId": "C"
}
],
"datasource": "Prometheus",
"gridPos": { "x": 0, "y": 8, "w": 12, "h": 8 }
},
{
"type": "timeseries",
"title": "Request Size (Bytes/sec)",
"targets": [
{
"expr": "sum(rate(http_request_size_bytes_sum[1m])) by (handler)",
"legendFormat": "",
"refId": "D"
}
],
"datasource": "Prometheus",
"gridPos": { "x": 12, "y": 8, "w": 12, "h": 8 }
},
{
"type": "timeseries",
"title": "Response Size (Bytes/sec)",
"targets": [
{
"expr": "sum(rate(http_response_size_bytes_sum[1m])) by (handler)",
"legendFormat": "",
"refId": "E"
}
],
"datasource": "Prometheus",
"gridPos": { "x": 0, "y": 16, "w": 12, "h": 8 }
},
{
"type": "timeseries",
"title": "In-Flight Requests",
"targets": [
{
"expr": "http_request_duration_seconds_count - ignoring(le) group_left sum(http_request_duration_seconds_bucket) by (handler)",
"legendFormat": "",
"refId": "F"
}
],
"datasource": "Prometheus",
"gridPos": { "x": 12, "y": 16, "w": 12, "h": 8 }
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"type": "dashboard",
"name": "Annotations & Alerts",
"enable": true
}
]
}
}
fastapi_prom_grafana_dashboard.json
https://gist.github.com/gouravjshah/2dd5482c36bc9c2111e036fb70916249
{
"title": "FastAPI Prometheus Metrics Dashboard",
"timezone": "browser",
"refresh": "10s",
"schemaVersion": 30,
"version": 1,
"panels": [
{
"type": "timeseries",
"title": "Request Rate (Total)",
"targets": [
{
"expr": "sum(rate(http_requests_total[1m])) by (handler)",
"legendFormat": "",
"refId": "A"
}
],
"datasource": "Prometheus",
"gridPos": {
"x": 0,
"y": 0,
"w": 12,
"h": 8
}
},
{
"type": "timeseries",
"title": "Latency (95th Percentile)",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[1m])) by (le, handler))",
"legendFormat": "",
"refId": "A"
}
],
"datasource": "Prometheus",
"gridPos": {
"x": 12,
"y": 0,
"w": 12,
"h": 8
}
},
{
"type": "timeseries",
"title": "Request Size (Bytes/sec)",
"targets": [
{
"expr": "rate(http_request_size_bytes_sum[1m])",
"legendFormat": "",
"refId": "A"
}
],
"datasource": "Prometheus",
"gridPos": {
"x": 0,
"y": 8,
"w": 12,
"h": 8
}
},
{
"type": "timeseries",
"title": "Response Size (Bytes/sec)",
"targets": [
{
"expr": "rate(http_response_size_bytes_sum[1m])",
"legendFormat": "",
"refId": "A"
}
],
"datasource": "Prometheus",
"gridPos": {
"x": 12,
"y": 8,
"w": 12,
"h": 8
}
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"type": "dashboard",
"name": "Annotations & Alerts",
"enable": true
}
]
}
}
// import grafana dashboard
http://localhost:30200/dashboard/import