Skip to content

Commit 817afda

Browse files
gagan16kaaronfern
andauthored
Fix mcm_machine_set_failed_machines metric to more accurately reflect current state (#1025)
Co-authored-by: Aaron Francis Fernandes <79958509+aaronfern@users.noreply.github.com>
1 parent 0e1acc6 commit 817afda

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

pkg/controller/deployment_machineset_util.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,11 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al
126126
}
127127
}
128128

129-
// Update the FailedMachines field only if we see new failures
130-
// Clear FailedMachines if ready replicas equals total replicas,
131-
// which means the machineset doesn't have any machine objects which are in any failed state
132-
// #nosec G115 -- number of machines will not exceed MaxInt32
129+
// Update the FailedMachines field when we see new failures
130+
// Clear FailedMachines if there are no failed machines.
133131
if len(failedMachines) > 0 {
134132
newStatus.FailedMachines = &failedMachines
135-
} else if int32(readyReplicasCount) == is.Status.Replicas {
133+
} else {
136134
newStatus.FailedMachines = nil
137135
}
138136

pkg/controller/metrics.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ func updateMachineSetStatusRelatedMetric(machineSet *v1alpha1.MachineSet, msMeta
119119
}
120120

121121
func updateMachineSetStatusFailedMachinesMetric(machineSet *v1alpha1.MachineSet, msMeta metav1.ObjectMeta) {
122+
metrics.MachineSetStatusFailedMachines.DeletePartialMatch(prometheus.Labels{
123+
"name": msMeta.Name,
124+
"namespace": msMeta.Namespace,
125+
})
126+
122127
if machineSet.Status.FailedMachines != nil {
123128
for _, failedMachine := range *machineSet.Status.FailedMachines {
124129
metrics.MachineSetStatusFailedMachines.With(prometheus.Labels{

0 commit comments

Comments
 (0)