Skip to content

Commit 6a2b950

Browse files
committed
Add metric for failed in-place update attempt
Signed-off-by: Omer Aplatony <omerap12@gmail.com>
1 parent f302346 commit 6a2b950

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

vertical-pod-autoscaler/pkg/updater/logic/updater.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ func (u *updater) RunOnce(ctx context.Context) {
284284
err := inPlaceLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder)
285285
if err != nil {
286286
klog.V(0).InfoS("In-place update failed", "error", err, "pod", klog.KObj(pod))
287+
metrics_updater.RecordFailedInPlaceUpdate(vpaSize, "InPlaceUpdateError")
287288
continue
288289
}
289290
withInPlaceUpdated = true

vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,21 @@ var (
108108
}, []string{"vpa_size_log2"},
109109
)
110110

111-
// TODO: Add metrics for failed in-place update attempts
111+
failedInPlaceUpdateAttempts = prometheus.NewCounterVec(
112+
prometheus.CounterOpts{
113+
Namespace: metricsNamespace,
114+
Name: "failed_in_place_update_attempts_total",
115+
Help: "Number of failed attempts to update Pods in-place.",
116+
}, []string{"vpa_size_log2", "reason"},
117+
)
112118

113119
functionLatency = metrics.CreateExecutionTimeMetric(metricsNamespace,
114120
"Time spent in various parts of VPA Updater main loop.")
115121
)
116122

117123
// Register initializes all metrics for VPA Updater
118124
func Register() {
119-
prometheus.MustRegister(controlledCount, evictableCount, evictedCount, vpasWithEvictablePodsCount, vpasWithEvictedPodsCount, inPlaceUpdatableCount, inPlaceUpdatedCount, vpasWithInPlaceUpdatablePodsCount, vpasWithInPlaceUpdatedPodsCount, functionLatency)
125+
prometheus.MustRegister(controlledCount, evictableCount, evictedCount, vpasWithEvictablePodsCount, vpasWithEvictedPodsCount, inPlaceUpdatableCount, inPlaceUpdatedCount, vpasWithInPlaceUpdatablePodsCount, vpasWithInPlaceUpdatedPodsCount, failedInPlaceUpdateAttempts, functionLatency)
120126
}
121127

122128
// NewExecutionTimer provides a timer for Updater's RunOnce execution
@@ -179,6 +185,12 @@ func AddInPlaceUpdatedPod(vpaSize int) {
179185
inPlaceUpdatedCount.WithLabelValues(strconv.Itoa(log2)).Inc()
180186
}
181187

188+
// RecordFailedInPlaceUpdate increases the counter of failed in-place update attempts by given VPA size and reason
189+
func RecordFailedInPlaceUpdate(vpaSize int, reason string) {
190+
log2 := metrics.GetVpaSizeLog2(vpaSize)
191+
failedInPlaceUpdateAttempts.WithLabelValues(strconv.Itoa(log2), reason).Inc()
192+
}
193+
182194
// Add increases the counter for the given VPA size
183195
func (g *SizeBasedGauge) Add(vpaSize int, value int) {
184196
log2 := metrics.GetVpaSizeLog2(vpaSize)

0 commit comments

Comments
 (0)