Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions charts/sophora-cluster-common/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@ apiVersion: v2
name: sophora-cluster-common
description: A Helm chart containing some common resources useful for Sophora cloud setups
type: application
version: 1.5.0
version: 1.5.1
annotations:
artifacthub.io/changes: |
- kind: added
description: "added prometheus alert rule configuration options"
- kind: added
description: "added prometheus alert rule SophoraStagingServerNotInSync"
- kind: changed
description: "The prometheus alert rule SophoraReplicaServerNotInSync does no longer include staging servers"
- kind: fixed
description: "fixed invalid prometheus rule SophoraReplicaServerNotInSync"

appVersion: "4"
sources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
runbook_url: 'https://github.com/subshell/helm-charts/blob/main/charts/sophora-cluster-common/alerting-runbook.md'
- alert: SophoraReplicaServerNotInSync
for: {{ .Values.prometheusRules.config.SophoraReplicaServerNotInSync.for }}
expr: 'max(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_is_primary_server{namespace="{{ .Release.Namespace }}"} == 1) - ignoring(pod) group_right max by (pod) (sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_state{namespace="{{ .Release.Namespace }}"} == 2 and and sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 2) > 60000'
expr: 'max(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_is_primary_server{namespace="{{ .Release.Namespace }}"} == 1) - ignoring(pod) group_right max by (pod) (sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and sophora_server_state{namespace="{{ .Release.Namespace }}"} == 2 and sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 2) > 60000'
labels:
severity: high
namespace: "{{ .Release.Namespace }}"
Expand All @@ -32,7 +32,7 @@ spec:
runbook_url: 'https://github.com/subshell/helm-charts/blob/main/charts/sophora-cluster-common/alerting-runbook.md'
- alert: SophoraStagingServerNotInSync
for: {{ .Values.prometheusRules.config.SophoraStagingServerNotInSync.for }}
expr: 'sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and on(pod) (sophora_server_state{namespace="{{ .Release.Namespace }}"} == 3 or sophora_server_state{namespace="{{ .Release.Namespace }}"} == 4) and on(pod) sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 3 > 0'
expr: '(sophora_server_source_time{namespace="{{ .Release.Namespace }}"} and on(pod) (sophora_server_state{namespace="{{ .Release.Namespace }}"} == 3 or sophora_server_state{namespace="{{ .Release.Namespace }}"} == 4) and on(pod) sophora_server_replication_mode{namespace="{{ .Release.Namespace }}"} == 3) > 0'
labels:
severity: high
namespace: "{{ .Release.Namespace }}"
Expand Down
Loading