feat: add tracing enrichment and prometheus exporter

This commit is contained in:
msumshk
2025-12-02 22:29:38 +08:00
parent 0d2ad0aecb
commit 2121432d5d
13 changed files with 163 additions and 41 deletions

View File

@@ -0,0 +1,34 @@
groups:
- name: takeoutsaas-app
interval: 30s
rules:
- alert: HighErrorRate
expr: |
sum(rate(http_server_request_duration_seconds_count{http_response_status_code=~"5.."}[5m]))
/ sum(rate(http_server_request_duration_seconds_count[5m])) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "API 5xx 错误率过高"
description: "过去 5 分钟 5xx 占比超过 5%,请检查依赖或发布"
- alert: HighP95Latency
expr: |
histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[5m])) by (le, service_name))
> 1
for: 5m
labels:
severity: warning
annotations:
summary: "API P95 延迟过高"
description: "过去 5 分钟 P95 超过 1s请排查热点接口或依赖"
- alert: InstanceDown
expr: up{job=~"admin-api|mini-api|user-api"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "实例不可达"
description: "Prometheus 抓取失败,实例处于 down 状态"

View File

@@ -0,0 +1,28 @@
global:
scrape_interval: 15s
evaluation_interval: 30s
rule_files:
- alert.rules.yml
scrape_configs:
- job_name: admin-api
metrics_path: /metrics
static_configs:
- targets: ["admin-api:8080"]
labels:
service: admin-api
- job_name: mini-api
metrics_path: /metrics
static_configs:
- targets: ["mini-api:8080"]
labels:
service: mini-api
- job_name: user-api
metrics_path: /metrics
static_configs:
- targets: ["user-api:8080"]
labels:
service: user-api