Skip to content

Commit 9035e76

Browse files
authored
update prom rules for rabbit v4.1 (#1883)
Rule 'rabbitmq-tcp-sockets-near-limit' removed due to removed metrics: * rabbitmq_process_open_tcp_sockets * rabbitmq_process_max_tcp_sockets
1 parent a1b7201 commit 9035e76

10 files changed

+34
-54
lines changed

observability/prometheus/rules/rabbitmq-per-object/queue-has-no-consumers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ spec:
1414
expr: |
1515
(
1616
((rabbitmq_detailed_queue_consumers{vhost="/", queue=~".*"} == 0) + rabbitmq_detailed_queue_messages) > 0
17-
) * on (instance, job) group_left(rabbitmq_cluster) rabbitmq_identity_info
17+
) * on (instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
1818
for: 10m
1919
annotations:
2020
description: |

observability/prometheus/rules/rabbitmq-per-object/queue-is-growing.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ spec:
1515
expr: |
1616
(
1717
avg_over_time(rabbitmq_detailed_queue_messages[10m]) - avg_over_time(rabbitmq_detailed_queue_messages[10m] offset 1m) > 1
18-
) * on (instance, job) group_left(rabbitmq_cluster) rabbitmq_identity_info
18+
) * on (instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
1919
for: 10m
2020
annotations:
2121
description: |

observability/prometheus/rules/rabbitmq/cluster-alarms.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ spec:
1414
expr: |
1515
max by(rabbitmq_cluster) (
1616
max_over_time(rabbitmq_alarms_memory_used_watermark[5m])
17-
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
17+
* on(instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
1818
) > 0
1919
keep_firing_for: 5m
2020
annotations:
@@ -30,7 +30,7 @@ spec:
3030
expr: |
3131
max by(rabbitmq_cluster) (
3232
max_over_time(rabbitmq_alarms_free_disk_space_watermark[5m])
33-
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
33+
* on(instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
3434
) > 0
3535
keep_firing_for: 5m
3636
annotations:
@@ -46,7 +46,7 @@ spec:
4646
expr: |
4747
max by(rabbitmq_cluster) (
4848
max_over_time(rabbitmq_alarms_file_descriptor_limit[5m])
49-
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
49+
* on(instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
5050
) > 0
5151
keep_firing_for: 5m
5252
annotations:

observability/prometheus/rules/rabbitmq/container-restarts.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ spec:
1212
rules:
1313
- alert: ContainerRestarts
1414
expr: |
15-
increase(kube_pod_container_status_restarts_total[10m]) * on(namespace, pod, container) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
16-
>=
17-
1
15+
increase(kube_pod_container_status_restarts_total[10m])
16+
* on(namespace, pod, container) group_left(rabbitmq_cluster) max by (namespace, pod, container, rabbitmq_cluster) (rabbitmq_identity_info)
17+
>= 1
1818
for: 5m
1919
annotations:
2020
description: |

observability/prometheus/rules/rabbitmq/file-descriptors-near-limit.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ spec:
1212
rules:
1313
- alert: FileDescriptorsNearLimit
1414
expr: |
15-
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_fds[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster))
15+
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_fds[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) max by (instance, rabbitmq_node, rabbitmq_cluster) (rabbitmq_identity_info))
1616
/
17-
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_fds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster))
17+
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_fds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) max by (instance, rabbitmq_node, rabbitmq_cluster) (rabbitmq_identity_info))
1818
> 0.8
1919
for: 10m
2020
annotations:

observability/prometheus/rules/rabbitmq/high-connection-churn.yml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,27 @@ spec:
1313
- alert: HighConnectionChurn
1414
expr: |
1515
(
16-
sum(rate(rabbitmq_connections_closed_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by(namespace, rabbitmq_cluster)
16+
sum by (namespace, rabbitmq_cluster) (
17+
rate(rabbitmq_connections_closed_total[5m])
18+
* on (instance) group_left (rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
19+
)
1720
+
18-
sum(rate(rabbitmq_connections_opened_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by(namespace, rabbitmq_cluster)
21+
sum by (namespace, rabbitmq_cluster) (
22+
rate(rabbitmq_connections_opened_total[5m])
23+
* on (instance) group_left (rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
24+
)
1925
)
2026
/
21-
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by (namespace, rabbitmq_cluster)
27+
sum by (namespace, rabbitmq_cluster) (
28+
rabbitmq_connections
29+
* on (instance) group_left (rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
30+
)
2231
> 0.1
2332
unless
24-
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by (namespace, rabbitmq_cluster)
33+
sum by (namespace, rabbitmq_cluster) (
34+
rabbitmq_connections
35+
* on (instance) group_left (rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)
36+
)
2537
< 100
2638
for: 10m
2739
annotations:

observability/prometheus/rules/rabbitmq/insufficient-established-erlang-distribution-links.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ spec:
1313
- alert: InsufficientEstablishedErlangDistributionLinks
1414
# erlang_vm_dist_node_state: 1=pending, 2=up_pending, 3=up
1515
expr: |
16-
count by (namespace, rabbitmq_cluster) (erlang_vm_dist_node_state * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster) == 3)
16+
count by (namespace, rabbitmq_cluster) (erlang_vm_dist_node_state * on(instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info) == 3)
1717
<
18-
count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster))
18+
count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info))
1919
*
20-
(count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) -1 )
20+
(count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info)) -1 )
2121
for: 10m
2222
annotations:
2323
description: |

observability/prometheus/rules/rabbitmq/low-disk-watermark-predicted.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ spec:
1414
# The 2nd condition ensures that data points are available until 24 hours ago such that no false positive alerts are triggered for newly created RabbitMQ clusters.
1515
expr: |
1616
(
17-
predict_linear(rabbitmq_disk_space_available_bytes[24h], 60*60*24) * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
17+
predict_linear(rabbitmq_disk_space_available_bytes[24h], 60*60*24) * on (instance) group_left(rabbitmq_cluster, rabbitmq_node) max by (instance, rabbitmq_node, rabbitmq_cluster) (rabbitmq_identity_info)
1818
<
19-
rabbitmq_disk_space_available_limit_bytes * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
19+
rabbitmq_disk_space_available_limit_bytes * on (instance) group_left(rabbitmq_cluster, rabbitmq_node) max by (instance, rabbitmq_node, rabbitmq_cluster) (rabbitmq_identity_info)
2020
)
2121
and
2222
(
23-
count_over_time(rabbitmq_disk_space_available_limit_bytes[2h] offset 22h) * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
23+
count_over_time(rabbitmq_disk_space_available_limit_bytes[2h] offset 22h) * on (instance) group_left(rabbitmq_cluster, rabbitmq_node) max by (instance, rabbitmq_node, rabbitmq_cluster) (rabbitmq_identity_info)
2424
>
2525
0
2626
)

observability/prometheus/rules/rabbitmq/tcp-sockets-near-limit.yml

Lines changed: 0 additions & 32 deletions
This file was deleted.

observability/prometheus/rules/rabbitmq/unroutable-messages.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ spec:
1212
rules:
1313
- alert: UnroutableMessages
1414
expr: |
15-
sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_dropped_total[5m]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info)
15+
sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_dropped_total[5m]) * on (instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info))
1616
>= 1
1717
or
18-
sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_returned_total[5m]) * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info)
18+
sum by(namespace, rabbitmq_cluster) (increase(rabbitmq_channel_messages_unroutable_returned_total[5m]) * on (instance) group_left(rabbitmq_cluster) max by (instance, rabbitmq_cluster) (rabbitmq_identity_info))
1919
>= 1
2020
annotations:
2121
description: |

0 commit comments

Comments
 (0)