From 5e428b871deb0b9d853986e3a73980b02f9ee7c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20Sch=C3=B6ner?= Date: Fri, 19 Jun 2026 13:22:17 +0200 Subject: [PATCH 1/2] [sophora-cluster-common]: fix invalid prometheus rule --- charts/sophora-cluster-common/Chart.yaml | 10 +++------- .../templates/alerts/prometheusrule.yaml | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/charts/sophora-cluster-common/Chart.yaml b/charts/sophora-cluster-common/Chart.yaml index 3594a65f..066b39cb 100644 --- a/charts/sophora-cluster-common/Chart.yaml +++ b/charts/sophora-cluster-common/Chart.yaml @@ -2,15 +2,11 @@ apiVersion: v2 name: sophora-cluster-common description: A Helm chart containing some common resources useful for Sophora cloud setups type: application -version: 1.5.0 +version: 1.5.1 annotations: artifacthub.io/changes: | - - kind: added - description: "added prometheus alert rule configuration options" - - kind: added - description: "added prometheus alert rule SophoraStagingServerNotInSync" - - kind: changed - description: "The prometheus alert rule SophoraReplicaServerNotInSync does no longer include staging servers" + - kind: fixed + description: "fixed invalid prometheus rule SophoraReplicaServerNotInSync" appVersion: "4" sources: diff --git a/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml b/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml index c332e9e8..57695eb4 100644 --- a/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml +++ b/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml @@ -22,7 +22,7 @@ spec: runbook_url: 'https://github.com/subshell/helm-charts/blob/main/charts/sophora-cluster-common/alerting-runbook.md' - alert: SophoraReplicaServerNotInSync for: {{ .Values.prometheusRules.config.SophoraReplicaServerNotInSync.for }} - expr: 'max(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_is_primary_server{namespace="{{ .Release.Namespace }}"} == 1) - ignoring(pod) group_right max by (pod) (sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_state{namespace="{{ .Release.Namespace }}"} == 2 and and sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 2) > 60000' + expr: 'max(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_is_primary_server{namespace="{{ .Release.Namespace }}"} == 1) - ignoring(pod) group_right max by (pod) (sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_state{namespace="{{ .Release.Namespace }}"} == 2 and sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 2) > 60000' labels: severity: high namespace: "{{ .Release.Namespace }}" @@ -32,7 +32,7 @@ spec: runbook_url: 'https://github.com/subshell/helm-charts/blob/main/charts/sophora-cluster-common/alerting-runbook.md' - alert: SophoraStagingServerNotInSync for: {{ .Values.prometheusRules.config.SophoraStagingServerNotInSync.for }} - expr: 'sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and on(pod) (sophora_server_state{namespace="{{ .Release.Namespace }}"} == 3 or sophora_server_state{namespace="{{ .Release.Namespace }}"} == 4) and on(pod) sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 3 > 0' + expr: '(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and on(pod) (sophora_server_state{namespace="{{ .Release.Namespace }}"} == 3 or sophora_server_state{namespace="{{ .Release.Namespace }}"} == 4) and on(pod) sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 3) > 0' labels: severity: high namespace: "{{ .Release.Namespace }}" From c56a843fda68e288cd8bfa8d92e78cff35350b51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20Sch=C3=B6ner?= Date: Fri, 19 Jun 2026 13:24:38 +0200 Subject: [PATCH 2/2] [sophora-cluster-common]: cleanup --- .../sophora-cluster-common/templates/alerts/prometheusrule.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml b/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml index 57695eb4..318c63fc 100644 --- a/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml +++ b/charts/sophora-cluster-common/templates/alerts/prometheusrule.yaml @@ -22,7 +22,7 @@ spec: runbook_url: 'https://github.com/subshell/helm-charts/blob/main/charts/sophora-cluster-common/alerting-runbook.md' - alert: SophoraReplicaServerNotInSync for: {{ .Values.prometheusRules.config.SophoraReplicaServerNotInSync.for }} - expr: 'max(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_is_primary_server{namespace="{{ .Release.Namespace }}"} == 1) - ignoring(pod) group_right max by (pod) (sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_state{namespace="{{ .Release.Namespace }}"} == 2 and sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 2) > 60000' + expr: 'max(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_is_primary_server{namespace="{{ .Release.Namespace }}"} == 1) - ignoring(pod) group_right max by (pod) (sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_state{namespace="{{ .Release.Namespace }}"} == 2 and sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 2) > 60000' labels: severity: high namespace: "{{ .Release.Namespace }}"