-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmysql-alerts.yaml
163 lines (163 loc) · 7.29 KB
/
mysql-alerts.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
kind: PrometheusRule
apiVersion: monitoring.coreos.com/v1
metadata:
prometheus: dx
spec:
groups:
- name: mysql-alert
interval: 10m
rules:
- expr: (mysql_slave_status_slave_sql_running and ON (instance) mysql_slave_status_master_server_id > 0) == 0
alert: '[P0]-MySQL Slave SQL 线程未运行'
for: 1m
labels:
severity: critical
annotations:
description: MySQL Slave SQL 线程未在 {{ $labels.instance }} 上运行
level: P0
ruleGroupName: mysql-alert
ruleName: '[P0]-MySQL Slave SQL 线程未运行'
type: mysql
- expr: mysql_global_status_uptime < 60
alert: '[P1]-MySQL服务发生重启'
for: 1m
labels:
severity: error
annotations:
description: 实例为 {{ $labels.instance }} 的数据库已重启,请检查数据库实例
level: P1
ruleGroupName: mysql-alert
ruleName: '[P1]-MySQL服务发生重启'
type: mysql
- expr: mysql_global_status_threads_connected / mysql_global_variables_max_connections *100 > 95
alert: '[P2]-MySQL连接数使用率高'
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.instance }} 的数据库服务器,MySQL连接数使用率> 95%,实际使用率为{{ humanize $value }}%,请保持观察数据库和业务运行的稳定
level: P2
ruleGroupName: mysql-alert
ruleName: '[P2]-MySQL连接数使用率高'
type: mysql
- expr: rate(mysql_global_status_slow_queries[1m]) > 10
alert: '[P2]-MySQL慢查询增长数高'
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,MySQL慢查询增长数高,实际每秒数量为{{ humanize $value }},请保持观察数据库和业务运行的稳定
level: P2
ruleGroupName: mysql-alert
ruleName: '[P2]-MySQL慢查询增长数高'
type: mysql
- expr: rate(mysql_global_status_slow_queries[1m]) > 3
alert: '[P3]-MySQL慢查询增长数高'
for: 5m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,MySQL慢查询增长数高,实际每秒数量为{{ humanize $value }},请保持观察数据库和业务运行的稳定
level: P3
ruleGroupName: mysql-alert
ruleName: '[P3]-MySQL慢查询增长数高'
type: mysql
- expr: rate(mysql_global_status_innodb_log_waits[1m]) > 1
alert: '[P2]-MySQL InnoDB日志等待时间高'
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,MySQL InnoDB日志等待时间高,实际等待时间为{{ humanize $value }},请保持观察数据库和业务运行的稳定
level: P2
ruleGroupName: mysql-alert
ruleName: '[P2]-MySQL InnoDB日志等待时间高'
type: mysql
- expr: mysql_global_status_open_files / mysql_global_variables_open_files_limit *100 > 80
alert: '[P3]-MySQL文件描述符使用率高>80'
for: 5m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,MySQL文件描述符使用率>80%,实际使用率为{{ humanize $value }}%,请保持观察数据库和业务运行的稳定
level: P3
ruleGroupName: mysql-alert
ruleName: '[P3]-MySQL文件描述符使用率高>80'
type: mysql
- expr: mysql_global_status_open_files / mysql_global_variables_open_files_limit *100 > 90
alert: '[P2]-MySQL文件描述符使用率高'
for: 5m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,MySQL文件描述符使用率>90%,实际使用率为{{ humanize $value }}%,请保持观察数据库和业务运行的稳定
level: P2
ruleGroupName: mysql-alert
ruleName: '[P2]-MySQL文件描述符使用率高'
type: mysql
- expr: max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections * 100 > 60
alert: '[P2]-MySQL高线程运行'
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,超过 60% 的 MySQL 连接处于运行状态,请检查数据库
level: P2
ruleGroupName: mysql-alert
ruleName: '[P2]-MySQL高线程运行'
type: mysql
- expr: mysql_global_status_threads_connected / mysql_global_variables_max_connections *100 > 85
alert: '[P3]-MySQL连接数使用率高'
for: 5m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.instance }} 的数据库服务器,MySQL连接数使用率> 85%,实际使用率为{{ humanize $value }}%,请保持观察数据库和业务运行的稳定
level: P2
ruleGroupName: mysql-alert
ruleName: '[P3]-MySQL连接数使用率高'
type: mysql
- expr: rate(mysql_global_status_questions[1m]) > 1500
alert: '[P3]-MySQL QPS高'
for: 5m
labels:
severity: info
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,QPS高,实际QPS为{{ humanize $value }},请保持观察数据库和业务运行的稳定
level: P3
ruleGroupName: mysql-alert
ruleName: '[P3]-MySQL QPS高'
type: mysql
- expr: mysql_up == 0
alert: '[P0]-MySQL Down'
for: 1m
labels:
severity: error
annotations:
description: 实例为{{ $labels.instance }}的数据库宕机,请检查数据库实例
level: P0
ruleGroupName: mysql-alert
ruleName: '[P0]-MySQL Down'
type: mysql
- expr: rate(mysql_global_status_questions[1m]) > 2000
alert: '[P2]-MySQL QPS高'
for: 5m
labels:
severity: warn
annotations:
description: 实例为 {{ $labels.instance }} 的数据库,QPS高,实际QPS为{{ humanize $value }},请保持观察数据库和业务运行的稳定
level: P2
ruleGroupName: mysql-alert
ruleName: '[P2]-MySQL QPS高'
type: mysql
- expr: ( mysql_slave_status_slave_io_running and ON (instance) mysql_slave_status_master_server_id > 0 ) == 0
alert: '[P0]-MySQL Slave IO 线程未运行'
for: 1m
labels:
severity: error
annotations:
description: MySQL 从属 IO 线程未在 {{ $labels.instance }} 上运行,请检查数据库实例
level: P0
ruleGroupName: mysql-alert
ruleName: '[P0]-MySQL Slave IO 线程未运行'
type: mysql