@@ -108,15 +108,21 @@ var (
108108 }, []string {"vpa_size_log2" },
109109 )
110110
111- // TODO: Add metrics for failed in-place update attempts
111+ failedInPlaceUpdateAttempts = prometheus .NewCounterVec (
112+ prometheus.CounterOpts {
113+ Namespace : metricsNamespace ,
114+ Name : "failed_in_place_update_attempts_total" ,
115+ Help : "Number of failed attempts to update Pods in-place." ,
116+ }, []string {"vpa_size_log2" , "reason" },
117+ )
112118
113119 functionLatency = metrics .CreateExecutionTimeMetric (metricsNamespace ,
114120 "Time spent in various parts of VPA Updater main loop." )
115121)
116122
117123// Register initializes all metrics for VPA Updater
118124func Register () {
119- prometheus .MustRegister (controlledCount , evictableCount , evictedCount , vpasWithEvictablePodsCount , vpasWithEvictedPodsCount , inPlaceUpdatableCount , inPlaceUpdatedCount , vpasWithInPlaceUpdatablePodsCount , vpasWithInPlaceUpdatedPodsCount , functionLatency )
125+ prometheus .MustRegister (controlledCount , evictableCount , evictedCount , vpasWithEvictablePodsCount , vpasWithEvictedPodsCount , inPlaceUpdatableCount , inPlaceUpdatedCount , vpasWithInPlaceUpdatablePodsCount , vpasWithInPlaceUpdatedPodsCount , failedInPlaceUpdateAttempts , functionLatency )
120126}
121127
122128// NewExecutionTimer provides a timer for Updater's RunOnce execution
@@ -179,6 +185,12 @@ func AddInPlaceUpdatedPod(vpaSize int) {
179185 inPlaceUpdatedCount .WithLabelValues (strconv .Itoa (log2 )).Inc ()
180186}
181187
188+ // RecordFailedInPlaceUpdate increases the counter of failed in-place update attempts by given VPA size and reason
189+ func RecordFailedInPlaceUpdate (vpaSize int , reason string ) {
190+ log2 := metrics .GetVpaSizeLog2 (vpaSize )
191+ failedInPlaceUpdateAttempts .WithLabelValues (strconv .Itoa (log2 ), reason ).Inc ()
192+ }
193+
182194// Add increases the counter for the given VPA size
183195func (g * SizeBasedGauge ) Add (vpaSize int , value int ) {
184196 log2 := metrics .GetVpaSizeLog2 (vpaSize )
0 commit comments