Files
TakeoutSaaS.Docs/deploy/prometheus/alert.rules.yml
2026-01-29 01:58:15 +00:00

35 lines
1.1 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
groups:
- name: takeoutsaas-app
interval: 30s
rules:
- alert: HighErrorRate
expr: |
sum(rate(http_server_request_duration_seconds_count{http_response_status_code=~"5.."}[5m]))
/ sum(rate(http_server_request_duration_seconds_count[5m])) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "API 5xx 错误率过高"
description: "过去 5 分钟 5xx 占比超过 5%,请检查依赖或发布"
- alert: HighP95Latency
expr: |
histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[5m])) by (le, service_name))
> 1
for: 5m
labels:
severity: warning
annotations:
summary: "API P95 延迟过高"
description: "过去 5 分钟 P95 超过 1s请排查热点接口或依赖"
- alert: InstanceDown
expr: up{job=~"admin-api|mini-api|user-api"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "实例不可达"
description: "Prometheus 抓取失败,实例处于 down 状态"