File tree Expand file tree Collapse file tree 2 files changed +13
-5
lines changed
Expand file tree Collapse file tree 2 files changed +13
-5
lines changed Original file line number Diff line number Diff line change @@ -137,25 +137,25 @@ groups:
137137 annotations:
138138 description: "Root volume (OSD and MON store) is dangerously full: {{ $value | humanize }}% free."
139139
140- # alert on nic packet errors and drops rates > 1 packet/s
140+ # alert on nic packet errors and drops rates > alertmanager_packet_drop_threshold packet/s
141141 - alert: NetworkPacketsDropped
142- expr: irate(node_network_receive_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) + irate(node_network_transmit_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) > 1
142+ expr: irate(node_network_receive_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) + irate(node_network_transmit_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) > {% endraw %}{{ alertmanager_packet_drop_threshold }}{% raw %}
143143 labels:
144144 severity: warning
145145 annotations:
146146 description: >
147- Node {{ $labels.instance }} experiences packet drop > 1
147+ Node {{ $labels.instance }} experiences packet drop > {% endraw %}{{ alertmanager_packet_drop_threshold }}{% raw %}
148148 packet/s on interface {{ $labels.device }}.
149149
150150 - alert: NetworkPacketErrors
151151 expr: |
152152 irate(node_network_receive_errs_total{device!="lo"}[5m]) +
153- irate(node_network_transmit_errs_total{device!="lo"}[5m]) > 1
153+ irate(node_network_transmit_errs_total{device!="lo"}[5m]) > {% endraw %}{{ alertmanager_packet_errors_threshold }}{% raw %}
154154 labels:
155155 severity: warning
156156 annotations:
157157 description: >
158- Node {{ $labels.instance }} experiences packet errors > 1
158+ Node {{ $labels.instance }} experiences packet errors > {% endraw %}{{ alertmanager_packet_errors_threshold }}{% raw %}
159159 packet/s on interface {{ $labels.device }}.
160160
161161 - alert: StorageFillingUp
Original file line number Diff line number Diff line change @@ -18,6 +18,14 @@ alertmanager_warn_network_bond_single_link: true
1818alertmanager_node_free_swap_warning_threshold_ratio : 0.25
1919alertmanager_node_free_swap_critical_threshold_ratio : 0.1
2020
21+ # Threshold to trigger an alert for dropped packets, measured in packets/s
22+ # averaged over 5 minutes.
23+ alertmanager_packet_drop_threshold : 1
24+
25+ # Threshold to trigger an alert for packet receive/transmit errors, measured in
26+ # packets/s averaged over 5 minutes.
27+ alertmanager_packet_errors_threshold : 1
28+
2129# ##############################################################################
2230# Exporter configuration
2331
You can’t perform that action at this time.
0 commit comments