-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrocketmq-alerts.yaml
152 lines (152 loc) · 6.3 KB
/
rocketmq-alerts.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
kind: PrometheusRule
apiVersion: monitoring.coreos.com/v1
metadata:
prometheus: dx
spec:
groups:
- name: rocketmq-alert
interval: 10m
rules:
- expr: rocketmq_brokeruntime_commitlog_disk_ratio * 100 > 70
alert: [P3]-RocketMQ 磁盘空间不足
for: 5m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ 磁盘空间少于30%,实际只剩余{{ humanize $value }}%,请检查MQ实例
level: P3
ruleGroupName: rocketmq-alert
ruleName: [P3]-RocketMQ 磁盘空间不足
type: rocketmq
- expr: rocketmq_brokeruntime_commitlog_disk_ratio * 100 > 90
alert: [P1]-RocketMQ 磁盘空间不足
for: 5m
labels:
severity: error
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ 磁盘空间少于10%,实际只剩余{{ humanize $value }}%,请检查MQ实例
level: P1
ruleGroupName: rocketmq-alert
ruleName: [P1]-RocketMQ 磁盘空间不足
type: rocketmq
- expr: rocketmq_brokeruntime_pmdt_200to500ms > 2
alert: [P2]-RocketMQ 消息提交耗时高
for: 1m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ 消息提交耗时需要200~500ms,请检查MQ实例
level: P2
ruleGroupName: rocketmq-alert
ruleName: [P2]-RocketMQ 消息提交耗时高
type: rocketmq
- expr: sum(rocketmq_broker_tps - rocketmq_broker_tps offset 30s) by (namespace, cluster) > 1200
alert: [P3]-RocketMQ 发送tps激增
for: 2m
labels:
severity: info
annotations:
description: {{$labels.cluster}}集群, 发送tps在过去30s内出现激增已超过1200, 当前增加量:{{$value}}
level: P3
ruleGroupName: rocketmq-alert
ruleName: [P3]-RocketMQ 发送tps激增
type: rocketmq
- expr: sum(rocketmq_broker_tps - rocketmq_broker_tps offset 30s) by (namespace, cluster) > 3000
alert: [P1]-RocketMQ 发送tps激增
for: 2m
labels:
severity: error
annotations:
description: {{$labels.cluster}}集群, 发送tps在过去30s内出现激增已超过3000, 当前增加量:{{$value}}
level: P1
ruleGroupName: rocketmq-alert
ruleName: [P1]-RocketMQ 发送tps激增
type: rocketmq
- expr: sum(rocketmq_group_diff) by (namespace,group,topic) > 300
alert: [P3]-RocketMQ消息堆积
for: 5m
labels:
severity: info
annotations:
description: topic为{{ $labels.topic }},存在消息堆积情况,堆积量已大于300
level: P3
ruleGroupName: rocketmq-alert
ruleName: [P3]-RocketMQ消息堆积
type: rocketmq
- expr: sum(rocketmq_group_diff) by (namespace,group,topic) > 4000
alert: [P1]-RocketMQ消息堆积
for: 5m
labels:
severity: error
annotations:
description: topic为{{ $labels.topic }},存在消息堆积情况,堆积量已大于4000
level: P1
ruleGroupName: rocketmq-alert
ruleName: [P1]-RocketMQ消息堆积
type: rocketmq
- expr: rocketmq_brokeruntime_commitlog_disk_ratio * 100 > 80
alert: [P2]-RocketMQ 磁盘空间不足
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ 磁盘空间少于20%,实际只剩余{{ humanize $value }}%,请检查MQ实例
level: P2
ruleGroupName: rocketmq-alert
ruleName: [P2]-RocketMQ 磁盘空间不足
type: rocketmq
- expr:rocketmq_brokeruntime_send_threadpoolqueue_headwait_timemills > 200
alert: [P2]-RocketMQ broker 繁忙
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ broker 处于繁忙,实际的值为{{ humanize $value }}%,请检查MQ实例
level: P2
ruleGroupName: rocketmq-alert
ruleName: [P2]-RocketMQ broker 繁忙
type: rocketmq
- expr: rocketmq_brokeruntime_pmdt_100to200ms > 2
alert: [P3]-RocketMQ 消息提交耗时高
for: 1m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ 消息提交耗时需要100~200ms,请检查MQ实例
level: P3
ruleGroupName: rocketmq-alert
ruleName: [P3]-RocketMQ 消息提交耗时高
type: rocketmq
- expr: rocketmq_brokeruntime_pmdt_500to1s > 2
alert: [P1]-RocketMQ 消息提交耗时高
for: 1m
labels:
severity: error
annotations:
description: 实例为 {{ $labels.brokerIP }} RocketMQ 消息提交耗时存在500~1s,请检查MQ实例
level: P1
ruleGroupName: rocketmq-alert
ruleName: [P1]-RocketMQ 消息提交耗时高
type: rocketmq
- expr: sum(rocketmq_broker_tps - rocketmq_broker_tps offset 30s) by (namespace, cluster) > 2000
alert: [P2]-RocketMQ 发送tps激增
for: 2m
labels:
severity: warn
annotations:
description: {{$labels.cluster}}集群, 发送tps在过去30s内出现激增已超过2000, 当前增加量:{{$value}}
level: P2
ruleGroupName: rocketmq-alert
ruleName: [P2]-RocketMQ 发送tps激增
type: rocketmq
- expr: sum(rocketmq_group_diff) by (namespace,group,topic) > 500
alert: [P2]-RocketMQ消息堆积
for: 5m
labels:
severity: warn
annotations:
description: topic为{{ $labels.topic }},存在消息堆积情况,堆积量已大于500
level: P2
ruleGroupName: rocketmq-alert
ruleName: [P2]-RocketMQ消息堆积
type: rocketmq