diff --git a/docs/gitbook/usage/monitoring.md b/docs/gitbook/usage/monitoring.md index d1379991a..deb09e177 100644 --- a/docs/gitbook/usage/monitoring.md +++ b/docs/gitbook/usage/monitoring.md @@ -108,6 +108,14 @@ flagger_canary_total{namespace="test"} 1 # 0 - running, 1 - successful, 2 - failed flagger_canary_status{name="podinfo" namespace="test"} 1 +# Current canary phase gauge +# 0 - Initializing, 1 - Initialized, 2 - Waiting, 3 - Progressing, +# 4 - WaitingPromotion, 5 - Promoting, 6 - Finalising, 7 - Succeeded, +# 8 - Failed, 9 - Terminating, 10 - Terminated +# A deleted canary keeps emitting the metric with the Terminated value (10), +# so queries can filter out removed canaries with e.g. flagger_canary_phase < 9 +flagger_canary_phase{name="podinfo" namespace="test"} 7 + # Canary traffic weight gauge flagger_canary_weight{workload="podinfo-primary" namespace="test"} 95 flagger_canary_weight{workload="podinfo" namespace="test"} 5 diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 39c72c373..8939da871 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -180,6 +180,9 @@ func NewController( if ok { ctrl.logger.Infof("Deleting %s.%s from cache", r.Name, r.Namespace) ctrl.canaries.Delete(fmt.Sprintf("%s.%s", r.Name, r.Namespace)) + // record the terminated phase so deleted canaries can be filtered + // out of the flagger_canary_phase metric (e.g. flagger_canary_phase < 9) + ctrl.recorder.SetPhase(&r, flaggerv1.CanaryPhaseTerminated) } }, }) diff --git a/pkg/controller/finalizer.go b/pkg/controller/finalizer.go index 57ab04863..e3f82123f 100644 --- a/pkg/controller/finalizer.go +++ b/pkg/controller/finalizer.go @@ -51,6 +51,10 @@ func (c *Controller) finalize(old interface{}) error { return fmt.Errorf("failed to update status: %w", err) } + // record the terminating phase on the flagger_canary_phase metric; + // the final Terminated phase is recorded by the informer delete handler + c.recorder.SetPhase(canary, flaggerv1.CanaryPhaseTerminating) + // record event c.recordEventInfof(canary, "Terminating canary %s.%s", canary.Name, canary.Namespace) } diff --git a/pkg/controller/scheduler_metrics_test.go b/pkg/controller/scheduler_metrics_test.go index 1f7cd67de..df876bd30 100644 --- a/pkg/controller/scheduler_metrics_test.go +++ b/pkg/controller/scheduler_metrics_test.go @@ -197,6 +197,9 @@ func TestController_MetricsStateTransition(t *testing.T) { actualStatus := testutil.ToFloat64(mocks.ctrl.recorder.GetStatusMetric().WithLabelValues("podinfo", "default")) assert.Equal(t, float64(1), actualStatus) + actualPhase := testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default")) + assert.Equal(t, float64(1), actualPhase) // Initialized + actualTotal := testutil.ToFloat64(mocks.ctrl.recorder.GetTotalMetric().WithLabelValues("default")) assert.GreaterOrEqual(t, actualTotal, float64(0)) dep2 := newDeploymentTestDeploymentV2() @@ -210,6 +213,9 @@ func TestController_MetricsStateTransition(t *testing.T) { actualStatus = testutil.ToFloat64(mocks.ctrl.recorder.GetStatusMetric().WithLabelValues("podinfo", "default")) assert.Equal(t, float64(0), actualStatus) + actualPhase = testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default")) + assert.Equal(t, float64(3), actualPhase) // Progressing + actualPrimaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo-primary", "default")) actualCanaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo", "default")) @@ -234,6 +240,9 @@ func TestController_MetricsStateTransition(t *testing.T) { successCount := testutil.ToFloat64(mocks.ctrl.recorder.GetSuccessesMetric().WithLabelValues("podinfo", "default", "canary", "completed")) assert.Equal(t, float64(1), successCount) + + actualPhase = testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default")) + assert.Equal(t, float64(7), actualPhase) // Succeeded }) t.Run("failed canary rollback with count metrics", func(t *testing.T) { @@ -270,6 +279,9 @@ func TestController_MetricsStateTransition(t *testing.T) { actualStatus := testutil.ToFloat64(mocks.ctrl.recorder.GetStatusMetric().WithLabelValues("podinfo", "default")) assert.Equal(t, float64(2), actualStatus) + actualPhase := testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default")) + assert.Equal(t, float64(8), actualPhase) // Failed + actualPrimaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo-primary", "default")) actualCanaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo", "default")) assert.Equal(t, float64(100), actualPrimaryWeight) diff --git a/pkg/metrics/recorder.go b/pkg/metrics/recorder.go index 85e4d6e0a..81f21b710 100644 --- a/pkg/metrics/recorder.go +++ b/pkg/metrics/recorder.go @@ -37,6 +37,24 @@ const ( AnalysisStatusSkipped = "skipped" ) +// canaryPhaseValues maps each canary phase to a unique integer value +// exposed by the flagger_canary_phase metric. Unlike flagger_canary_status +// (which collapses all phases into running/successful/failed), this mapping +// keeps every phase distinct so they can be rendered on a Grafana state-timeline. +var canaryPhaseValues = map[flaggerv1.CanaryPhase]float64{ + flaggerv1.CanaryPhaseInitializing: 0, + flaggerv1.CanaryPhaseInitialized: 1, + flaggerv1.CanaryPhaseWaiting: 2, + flaggerv1.CanaryPhaseProgressing: 3, + flaggerv1.CanaryPhaseWaitingPromotion: 4, + flaggerv1.CanaryPhasePromoting: 5, + flaggerv1.CanaryPhaseFinalising: 6, + flaggerv1.CanaryPhaseSucceeded: 7, + flaggerv1.CanaryPhaseFailed: 8, + flaggerv1.CanaryPhaseTerminating: 9, + flaggerv1.CanaryPhaseTerminated: 10, +} + // CanaryMetricLabels holds labels for canary metrics type CanaryMetricLabels struct { Name string @@ -56,6 +74,7 @@ type Recorder struct { duration *prometheus.HistogramVec total *prometheus.GaugeVec status *prometheus.GaugeVec + phase *prometheus.GaugeVec weight *prometheus.GaugeVec analysis *prometheus.GaugeVec successes *prometheus.CounterVec @@ -90,6 +109,14 @@ func NewRecorder(controller string, register bool) Recorder { Help: "Last canary analysis result", }, []string{"name", "namespace"}) + phase := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: controller, + Name: "canary_phase", + Help: "Current canary phase " + + "(0=Initializing, 1=Initialized, 2=Waiting, 3=Progressing, 4=WaitingPromotion, " + + "5=Promoting, 6=Finalising, 7=Succeeded, 8=Failed, 9=Terminating, 10=Terminated)", + }, []string{"name", "namespace"}) + weight := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: controller, Name: "canary_weight", @@ -119,6 +146,7 @@ func NewRecorder(controller string, register bool) Recorder { prometheus.MustRegister(duration) prometheus.MustRegister(total) prometheus.MustRegister(status) + prometheus.MustRegister(phase) prometheus.MustRegister(weight) prometheus.MustRegister(analysis) prometheus.MustRegister(successes) @@ -130,6 +158,7 @@ func NewRecorder(controller string, register bool) Recorder { duration: duration, total: total, status: status, + phase: phase, weight: weight, analysis: analysis, successes: successes, @@ -168,6 +197,17 @@ func (cr *Recorder) SetStatus(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) status = 1 } cr.status.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Set(float64(status)) + cr.SetPhase(cd, phase) +} + +// SetPhase sets the canary phase as a unique value per phase, see canaryPhaseValues. +// Unknown phases are ignored to avoid recording a misleading value. +func (cr *Recorder) SetPhase(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) { + value, ok := canaryPhaseValues[phase] + if !ok { + return + } + cr.phase.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Set(value) } // SetWeight sets the weight values for primary and canary destinations @@ -191,6 +231,11 @@ func (cr *Recorder) GetStatusMetric() *prometheus.GaugeVec { return cr.status } +// GetPhaseMetric returns the phase metric +func (cr *Recorder) GetPhaseMetric() *prometheus.GaugeVec { + return cr.phase +} + // GetWeightMetric returns the weight metric func (cr *Recorder) GetWeightMetric() *prometheus.GaugeVec { return cr.weight diff --git a/pkg/metrics/recorder_test.go b/pkg/metrics/recorder_test.go index c98ff1231..478a8c37f 100644 --- a/pkg/metrics/recorder_test.go +++ b/pkg/metrics/recorder_test.go @@ -66,6 +66,30 @@ func TestRecorder_GetterMethodsWithData(t *testing.T) { expected: 1.0, checkValue: true, }, + { + name: "SetAndGetPhase", + setupFunc: func(r Recorder) { r.SetPhase(canary, flaggerv1.CanaryPhaseWaitingPromotion) }, + getterFunc: func(r Recorder) interface{} { return r.GetPhaseMetric() }, + labels: []string{"podinfo", "default"}, + expected: 4.0, + checkValue: true, + }, + { + name: "SetAndGetPhaseTerminating", + setupFunc: func(r Recorder) { r.SetPhase(canary, flaggerv1.CanaryPhaseTerminating) }, + getterFunc: func(r Recorder) interface{} { return r.GetPhaseMetric() }, + labels: []string{"podinfo", "default"}, + expected: 9.0, + checkValue: true, + }, + { + name: "SetAndGetPhaseTerminated", + setupFunc: func(r Recorder) { r.SetPhase(canary, flaggerv1.CanaryPhaseTerminated) }, + getterFunc: func(r Recorder) interface{} { return r.GetPhaseMetric() }, + labels: []string{"podinfo", "default"}, + expected: 10.0, + checkValue: true, + }, { name: "SetAndGetTotal", setupFunc: func(r Recorder) { r.SetTotal("default", 3) },