Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/gitbook/usage/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ flagger_canary_total{namespace="test"} 1
# 0 - running, 1 - successful, 2 - failed
flagger_canary_status{name="podinfo" namespace="test"} 1

# Current canary phase gauge
# 0 - Initializing, 1 - Initialized, 2 - Waiting, 3 - Progressing,
# 4 - WaitingPromotion, 5 - Promoting, 6 - Finalising, 7 - Succeeded,
# 8 - Failed, 9 - Terminating, 10 - Terminated
# A deleted canary keeps emitting the metric with the Terminated value (10),
# so queries can filter out removed canaries with e.g. flagger_canary_phase < 9
flagger_canary_phase{name="podinfo" namespace="test"} 7

# Canary traffic weight gauge
flagger_canary_weight{workload="podinfo-primary" namespace="test"} 95
flagger_canary_weight{workload="podinfo" namespace="test"} 5
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ func NewController(
if ok {
ctrl.logger.Infof("Deleting %s.%s from cache", r.Name, r.Namespace)
ctrl.canaries.Delete(fmt.Sprintf("%s.%s", r.Name, r.Namespace))
// record the terminated phase so deleted canaries can be filtered
// out of the flagger_canary_phase metric (e.g. flagger_canary_phase < 9)
ctrl.recorder.SetPhase(&r, flaggerv1.CanaryPhaseTerminated)
}
},
})
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/finalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ func (c *Controller) finalize(old interface{}) error {
return fmt.Errorf("failed to update status: %w", err)
}

// record the terminating phase on the flagger_canary_phase metric;
// the final Terminated phase is recorded by the informer delete handler
c.recorder.SetPhase(canary, flaggerv1.CanaryPhaseTerminating)

// record event
c.recordEventInfof(canary, "Terminating canary %s.%s", canary.Name, canary.Namespace)
}
Expand Down
12 changes: 12 additions & 0 deletions pkg/controller/scheduler_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ func TestController_MetricsStateTransition(t *testing.T) {
actualStatus := testutil.ToFloat64(mocks.ctrl.recorder.GetStatusMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(1), actualStatus)

actualPhase := testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(1), actualPhase) // Initialized

actualTotal := testutil.ToFloat64(mocks.ctrl.recorder.GetTotalMetric().WithLabelValues("default"))
assert.GreaterOrEqual(t, actualTotal, float64(0))
dep2 := newDeploymentTestDeploymentV2()
Expand All @@ -210,6 +213,9 @@ func TestController_MetricsStateTransition(t *testing.T) {
actualStatus = testutil.ToFloat64(mocks.ctrl.recorder.GetStatusMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(0), actualStatus)

actualPhase = testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(3), actualPhase) // Progressing

actualPrimaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo-primary", "default"))
actualCanaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo", "default"))

Expand All @@ -234,6 +240,9 @@ func TestController_MetricsStateTransition(t *testing.T) {

successCount := testutil.ToFloat64(mocks.ctrl.recorder.GetSuccessesMetric().WithLabelValues("podinfo", "default", "canary", "completed"))
assert.Equal(t, float64(1), successCount)

actualPhase = testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(7), actualPhase) // Succeeded
})

t.Run("failed canary rollback with count metrics", func(t *testing.T) {
Expand Down Expand Up @@ -270,6 +279,9 @@ func TestController_MetricsStateTransition(t *testing.T) {
actualStatus := testutil.ToFloat64(mocks.ctrl.recorder.GetStatusMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(2), actualStatus)

actualPhase := testutil.ToFloat64(mocks.ctrl.recorder.GetPhaseMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(8), actualPhase) // Failed

actualPrimaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo-primary", "default"))
actualCanaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo", "default"))
assert.Equal(t, float64(100), actualPrimaryWeight)
Expand Down
45 changes: 45 additions & 0 deletions pkg/metrics/recorder.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@ const (
AnalysisStatusSkipped = "skipped"
)

// canaryPhaseValues maps each canary phase to a unique integer value
// exposed by the flagger_canary_phase metric. Unlike flagger_canary_status
// (which collapses all phases into running/successful/failed), this mapping
// keeps every phase distinct so they can be rendered on a Grafana state-timeline.
var canaryPhaseValues = map[flaggerv1.CanaryPhase]float64{
flaggerv1.CanaryPhaseInitializing: 0,
flaggerv1.CanaryPhaseInitialized: 1,
flaggerv1.CanaryPhaseWaiting: 2,
flaggerv1.CanaryPhaseProgressing: 3,
flaggerv1.CanaryPhaseWaitingPromotion: 4,
flaggerv1.CanaryPhasePromoting: 5,
flaggerv1.CanaryPhaseFinalising: 6,
flaggerv1.CanaryPhaseSucceeded: 7,
flaggerv1.CanaryPhaseFailed: 8,
flaggerv1.CanaryPhaseTerminating: 9,
flaggerv1.CanaryPhaseTerminated: 10,
}

// CanaryMetricLabels holds labels for canary metrics
type CanaryMetricLabels struct {
Name string
Expand All @@ -56,6 +74,7 @@ type Recorder struct {
duration *prometheus.HistogramVec
total *prometheus.GaugeVec
status *prometheus.GaugeVec
phase *prometheus.GaugeVec
weight *prometheus.GaugeVec
analysis *prometheus.GaugeVec
successes *prometheus.CounterVec
Expand Down Expand Up @@ -90,6 +109,14 @@ func NewRecorder(controller string, register bool) Recorder {
Help: "Last canary analysis result",
}, []string{"name", "namespace"})

phase := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: controller,
Name: "canary_phase",
Help: "Current canary phase " +
"(0=Initializing, 1=Initialized, 2=Waiting, 3=Progressing, 4=WaitingPromotion, " +
"5=Promoting, 6=Finalising, 7=Succeeded, 8=Failed, 9=Terminating, 10=Terminated)",
}, []string{"name", "namespace"})

weight := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: controller,
Name: "canary_weight",
Expand Down Expand Up @@ -119,6 +146,7 @@ func NewRecorder(controller string, register bool) Recorder {
prometheus.MustRegister(duration)
prometheus.MustRegister(total)
prometheus.MustRegister(status)
prometheus.MustRegister(phase)
prometheus.MustRegister(weight)
prometheus.MustRegister(analysis)
prometheus.MustRegister(successes)
Expand All @@ -130,6 +158,7 @@ func NewRecorder(controller string, register bool) Recorder {
duration: duration,
total: total,
status: status,
phase: phase,
weight: weight,
analysis: analysis,
successes: successes,
Expand Down Expand Up @@ -168,6 +197,17 @@ func (cr *Recorder) SetStatus(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase)
status = 1
}
cr.status.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Set(float64(status))
cr.SetPhase(cd, phase)
}

// SetPhase sets the canary phase as a unique value per phase, see canaryPhaseValues.
// Unknown phases are ignored to avoid recording a misleading value.
func (cr *Recorder) SetPhase(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) {
value, ok := canaryPhaseValues[phase]
if !ok {
return
}
cr.phase.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Set(value)
}

// SetWeight sets the weight values for primary and canary destinations
Expand All @@ -191,6 +231,11 @@ func (cr *Recorder) GetStatusMetric() *prometheus.GaugeVec {
return cr.status
}

// GetPhaseMetric returns the phase metric
func (cr *Recorder) GetPhaseMetric() *prometheus.GaugeVec {
return cr.phase
}

// GetWeightMetric returns the weight metric
func (cr *Recorder) GetWeightMetric() *prometheus.GaugeVec {
return cr.weight
Expand Down
24 changes: 24 additions & 0 deletions pkg/metrics/recorder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,30 @@ func TestRecorder_GetterMethodsWithData(t *testing.T) {
expected: 1.0,
checkValue: true,
},
{
name: "SetAndGetPhase",
setupFunc: func(r Recorder) { r.SetPhase(canary, flaggerv1.CanaryPhaseWaitingPromotion) },
getterFunc: func(r Recorder) interface{} { return r.GetPhaseMetric() },
labels: []string{"podinfo", "default"},
expected: 4.0,
checkValue: true,
},
{
name: "SetAndGetPhaseTerminating",
setupFunc: func(r Recorder) { r.SetPhase(canary, flaggerv1.CanaryPhaseTerminating) },
getterFunc: func(r Recorder) interface{} { return r.GetPhaseMetric() },
labels: []string{"podinfo", "default"},
expected: 9.0,
checkValue: true,
},
{
name: "SetAndGetPhaseTerminated",
setupFunc: func(r Recorder) { r.SetPhase(canary, flaggerv1.CanaryPhaseTerminated) },
getterFunc: func(r Recorder) interface{} { return r.GetPhaseMetric() },
labels: []string{"podinfo", "default"},
expected: 10.0,
checkValue: true,
},
{
name: "SetAndGetTotal",
setupFunc: func(r Recorder) { r.SetTotal("default", 3) },
Expand Down