diff --git a/pkg/apihelpers/apihelpers.go b/pkg/apihelpers/apihelpers.go index 15277963fa..d62318e241 100644 --- a/pkg/apihelpers/apihelpers.go +++ b/pkg/apihelpers/apihelpers.go @@ -7,6 +7,7 @@ package apihelpers import ( "fmt" + configv1 "github.com/openshift/api/config/v1" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" opv1 "github.com/openshift/api/operator/v1" "github.com/openshift/machine-config-operator/pkg/daemon/constants" @@ -16,6 +17,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" + k8sversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/klog/v2" ) @@ -657,3 +659,32 @@ func GetSkewEnforcementStatusNone() opv1.BootImageSkewEnforcementStatus { Mode: opv1.BootImageSkewEnforcementModeStatusNone, } } + +// GetOCPInstallVersion extracts the install-time OCP version from +// ClusterVersion history — the oldest completed update entry, normalised to Major.Minor.Patch. +// Returns "0.0.0" as a conservative fallback if the history is empty or the version cannot be parsed. +func GetOCPInstallVersion(clusterVersion *configv1.ClusterVersion) string { + // This is not possible, but return a sane value to avoid API validation failures. + if len(clusterVersion.Status.History) == 0 { + klog.Warning("ClusterVersion has no history, cannot determine install version") + return "0.0.0" + } + var installVersion string + for i := len(clusterVersion.Status.History) - 1; i >= 0; i-- { + if clusterVersion.Status.History[i].State == configv1.CompletedUpdate { + installVersion = clusterVersion.Status.History[i].Version + break + } + } + if installVersion == "" { + installVersion = clusterVersion.Status.History[len(clusterVersion.Status.History)-1].Version + } + parsedVersion, err := k8sversion.ParseGeneric(installVersion) + if err != nil { + // Version strings from ClusterVersion history are always valid semver in practice. + // Log a warning and return 0.0.0 as a sanity check. + klog.Warningf("Failed to parse install version %q: %v", installVersion, err) + return "0.0.0" + } + return fmt.Sprintf("%d.%d.%d", parsedVersion.Major(), parsedVersion.Minor(), parsedVersion.Patch()) +} diff --git a/pkg/controller/bootimage/boot_image_controller.go b/pkg/controller/bootimage/boot_image_controller.go index c69788f974..8e54b0e111 100644 --- a/pkg/controller/bootimage/boot_image_controller.go +++ b/pkg/controller/bootimage/boot_image_controller.go @@ -39,6 +39,7 @@ import ( mcopinformersv1 "github.com/openshift/client-go/operator/informers/externalversions/operator/v1" mcoplistersv1 "github.com/openshift/client-go/operator/listers/operator/v1" + apihelpers "github.com/openshift/machine-config-operator/pkg/apihelpers" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/pkg/osimagestream" ) @@ -628,7 +629,42 @@ func (ctrl *Controller) updateClusterBootImage() { } // Only make an API call if there is an update to the skew enforcement status - if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, newBootImageSkewEnforcementStatus) { + if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, *newBootImageSkewEnforcementStatus) { + mcop.Status.BootImageSkewEnforcementStatus = *newBootImageSkewEnforcementStatus + ctrl.updateMachineConfigurationStatus(mcop.Status) + } +} + +// resetClusterBootImage writes the cluster's install OCP version into ClusterBootImageAutomatic when +// one or more MachineSets were reconcileSkipped. The recorded value can no longer be trusted as an +// accurate description of the full cluster state (a newly-added or changed MachineSet may be on an +// older image), so we fall back to the install version — the oldest version we can reliably claim — +// so that the skew check correctly surfaces a violation if the cluster is genuinely out of date. +func (ctrl *Controller) resetClusterBootImage() { + mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{}) + if err != nil { + klog.Errorf("error resetting cluster boot image record: %s", err) + return + } + if mcop.Status.BootImageSkewEnforcementStatus.Mode != opv1.BootImageSkewEnforcementModeStatusAutomatic { + return + } + + // Default to a conservative value to be on the safe side. + ocpVersion := "0.0.0" + clusterVersion, err := ctrl.clusterVersionLister.Get("version") + if err != nil { + klog.Warningf("Failed to get ClusterVersion for boot image reset: %v", err) + } else { + ocpVersion = apihelpers.GetOCPInstallVersion(clusterVersion) + } + + newBootImageSkewEnforcementStatus := mcop.Status.BootImageSkewEnforcementStatus.DeepCopy() + newBootImageSkewEnforcementStatus.Automatic = opv1.ClusterBootImageAutomatic{ + OCPVersion: ocpVersion, + } + if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, *newBootImageSkewEnforcementStatus) { + klog.Infof("Resetting cluster boot image record to install version %s due to reconcileSkipped MachineSets", ocpVersion) mcop.Status.BootImageSkewEnforcementStatus = *newBootImageSkewEnforcementStatus ctrl.updateMachineConfigurationStatus(mcop.Status) } diff --git a/pkg/controller/bootimage/boot_image_controller_test.go b/pkg/controller/bootimage/boot_image_controller_test.go index 6ae3b4f980..4bdc47171a 100644 --- a/pkg/controller/bootimage/boot_image_controller_test.go +++ b/pkg/controller/bootimage/boot_image_controller_test.go @@ -1,13 +1,16 @@ package bootimage import ( + "context" "testing" "github.com/coreos/stream-metadata-go/stream" "github.com/coreos/stream-metadata-go/stream/rhcos" osconfigv1 "github.com/openshift/api/config/v1" machinev1beta1 "github.com/openshift/api/machine/v1beta1" + opv1 "github.com/openshift/api/operator/v1" configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" + fakemcopclient "github.com/openshift/client-go/operator/clientset/versioned/fake" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -16,6 +19,7 @@ import ( "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" ) func TestIsClusterStable(t *testing.T) { @@ -533,7 +537,7 @@ func TestReconcileAzureProviderSpec(t *testing.T) { }, } - fakeClient := fake.NewSimpleClientset(testSecret) + fakeClient := fake.NewClientset(testSecret) tests := []struct { name string @@ -969,3 +973,106 @@ func TestReconcileAzureProviderSpec(t *testing.T) { }) } } + +func TestResetClusterBootImage(t *testing.T) { + cases := []struct { + name string + history []osconfigv1.UpdateHistory + mode opv1.BootImageSkewEnforcementModeStatus + initialOCPVersion string + expectedVersion string + }{ + { + name: "resets to oldest completed entry (install version)", + history: []osconfigv1.UpdateHistory{ + {State: osconfigv1.CompletedUpdate, Version: "4.19.0"}, + {State: osconfigv1.CompletedUpdate, Version: "4.18.0"}, + }, + mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + initialOCPVersion: "4.19.0", + expectedVersion: "4.18.0", + }, + { + name: "no-op when mode is not automatic", + history: []osconfigv1.UpdateHistory{ + {State: osconfigv1.CompletedUpdate, Version: "4.18.0"}, + }, + mode: opv1.BootImageSkewEnforcementModeStatusNone, + initialOCPVersion: "4.19.0", + expectedVersion: "4.19.0", + }, + { + name: "no-op when already at install version", + history: []osconfigv1.UpdateHistory{ + {State: osconfigv1.CompletedUpdate, Version: "4.18.0"}, + }, + mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + initialOCPVersion: "4.18.0", + expectedVersion: "4.18.0", + }, + { + name: "falls back to 0.0.0 when history is empty", + history: []osconfigv1.UpdateHistory{}, + mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + initialOCPVersion: "4.19.0", + expectedVersion: "0.0.0", + }, + { + name: "uses last entry when no completed update exists", + history: []osconfigv1.UpdateHistory{ + {State: osconfigv1.PartialUpdate, Version: "4.18.0"}, + }, + mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + initialOCPVersion: "4.19.0", + expectedVersion: "4.18.0", + }, + { + // history==nil is the sentinel for "do not populate the lister", + // exercising the clusterVersionLister.Get() error path. + name: "falls back to 0.0.0 when ClusterVersion unavailable", + history: nil, + mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + initialOCPVersion: "4.19.0", + expectedVersion: "0.0.0", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + mcop := &opv1.MachineConfiguration{ + ObjectMeta: v1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: tc.mode, + Automatic: opv1.ClusterBootImageAutomatic{ + OCPVersion: tc.initialOCPVersion, + }, + }, + }, + } + + cvIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + if tc.history != nil { + cv := &osconfigv1.ClusterVersion{ + ObjectMeta: v1.ObjectMeta{Name: "version"}, + Status: osconfigv1.ClusterVersionStatus{History: tc.history}, + } + require.NoError(t, cvIndexer.Add(cv)) + } + + fakeMcopClient := fakemcopclient.NewClientset(mcop) + ctrl := &Controller{ + mcopClient: fakeMcopClient, + clusterVersionLister: configlistersv1.NewClusterVersionLister(cvIndexer), + } + + ctrl.resetClusterBootImage() + + updated, err := fakeMcopClient.OperatorV1().MachineConfigurations().Get( + context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, v1.GetOptions{}) + require.NoError(t, err) + assert.Equal(t, tc.expectedVersion, + updated.Status.BootImageSkewEnforcementStatus.Automatic.OCPVersion) + }) + } +} diff --git a/pkg/controller/bootimage/cpms_helpers.go b/pkg/controller/bootimage/cpms_helpers.go index b362275395..13270630d2 100644 --- a/pkg/controller/bootimage/cpms_helpers.go +++ b/pkg/controller/bootimage/cpms_helpers.go @@ -14,7 +14,6 @@ import ( machinev1 "github.com/openshift/api/machine/v1" opv1 "github.com/openshift/api/operator/v1" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - operatorversion "github.com/openshift/machine-config-operator/pkg/version" "sigs.k8s.io/yaml" corev1 "k8s.io/api/core/v1" @@ -145,31 +144,9 @@ func (ctrl *Controller) syncControlPlaneMachineSet(controlPlaneMachineSet *machi return fmt.Errorf("failed to fetch infra object during ControlPlaneMachineSet sync: %w", err) } - // Fetch the bootimage configmap & ensure it has been stamped by the operator. This is done by - // the operator when a master node successfully updates to a new image. This is - // to prevent machinesets from being updated before the operator itself has updated. - // If it hasn't been updated, exit and wait for a resync. configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName) if err != nil { - return fmt.Errorf("failed to fetch coreos-bootimages config map duringControlPlaneMachineSet sync: %w", err) - } - versionHashFromCM, versionHashFound := configMap.Data[ctrlcommon.MCOVersionHashKey] - if !versionHashFound { - klog.Infof("failed to find mco version hash in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName) - return nil - } - if versionHashFromCM != operatorversion.Hash { - klog.Infof("mismatch between MCO hash version stored in configmap and current MCO version; sync will exit to wait for the MCO upgrade to complete") - return nil - } - releaseVersionFromCM, releaseVersionFound := configMap.Data[ctrlcommon.OCPReleaseVersionKey] - if !releaseVersionFound { - klog.Infof("failed to find OCP release version in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName) - return nil - } - if releaseVersionFromCM != operatorversion.ReleaseVersion { - klog.Infof("mismatch between OCP release version stored in configmap and current MCO release version; sync will exit to wait for the MCO upgrade to complete") - return nil + return fmt.Errorf("failed to fetch coreos-bootimages config map during ControlPlaneMachineSet sync: %w", err) } // Check if the this ControlPlaneMachineSet requires an update diff --git a/pkg/controller/bootimage/ms_helpers.go b/pkg/controller/bootimage/ms_helpers.go index 3380d07338..a4bd05fb84 100644 --- a/pkg/controller/bootimage/ms_helpers.go +++ b/pkg/controller/bootimage/ms_helpers.go @@ -13,7 +13,6 @@ import ( machinev1beta1 "github.com/openshift/api/machine/v1beta1" opv1 "github.com/openshift/api/operator/v1" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - operatorversion "github.com/openshift/machine-config-operator/pkg/version" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" kubeErrs "k8s.io/apimachinery/pkg/util/errors" @@ -69,6 +68,17 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) { } } + var configMap *corev1.ConfigMap + // The configMap is not needed if no resources are being reconciled; so check that first before making the API call. + if len(mapiMachineSets) > 0 { + configMap, err = ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName) + if err != nil { + klog.Errorf("failed to fetch coreos-bootimages config map: %v", err) + ctrl.updateConditions(reason, fmt.Errorf("failed to fetch coreos-bootimages config map: %w", err), opv1.MachineConfigurationBootImageUpdateDegraded) + return + } + } + // Reset stats before initiating reconciliation loop ctrl.mapiStats.inProgress = 0 ctrl.mapiStats.totalCount = len(mapiMachineSets) @@ -80,7 +90,7 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) { ctrl.updateConditions(reason, nil, opv1.MachineConfigurationBootImageUpdateProgressing) for _, machineSet := range mapiMachineSets { - patchSkipped, err := ctrl.syncMAPIMachineSet(machineSet) + reconcileSkipped, err := ctrl.syncMAPIMachineSet(machineSet, configMap) if err == nil { ctrl.mapiStats.inProgress++ } else { @@ -88,7 +98,7 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) { syncErrors = append(syncErrors, fmt.Errorf("error syncing MAPI MachineSet %s: %w", machineSet.Name, err)) ctrl.mapiStats.erroredCount++ } - if patchSkipped { + if reconcileSkipped { ctrl.mapiStats.skippedCount++ } // Update progressing conditions every step of the loop @@ -96,16 +106,30 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) { } // Update/Clear degrade conditions based on errors from this loop ctrl.updateConditions(reason, kubeErrs.NewAggregate(syncErrors), opv1.MachineConfigurationBootImageUpdateDegraded) - // If no machinesets were skipped, update the cluster boot image record if ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) { - if ctrl.mapiStats.skippedCount == 0 { + switch { + case ctrl.mapiStats.skippedCount == 0 && len(syncErrors) == 0: + // All MachineSets reconciled cleanly — record the current OCP version. ctrl.updateClusterBootImage() + case ctrl.mapiStats.skippedCount > 0 && len(syncErrors) == 0: + // One or more MachineSets were reconcileSkipped without an error. The existing + // ClusterBootImageAutomatic value is no longer trustworthy (a new or changed + // MachineSet may be running an older image), so reset it to the cluster install + // version to ensure the skew check surfaces a violation if warranted. + ctrl.resetClusterBootImage() } + // Errors (syncErrors > 0) are already surfaced via the Degraded condition, which + // checkBootImageControllerReady checks first — no boot image record update needed. } } -// syncMAPIMachineSet will attempt to reconcile the provided machineset -func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet) (bool, error) { +// syncMAPIMachineSet will attempt to reconcile the provided machineset. +// Returns (reconcileSkipped, error): reconcileSkipped=true means something blocked the +// boot image update that requires manual intervention; rather than returning an +// error immediately, the condition is surfaced via skew enforcement. +// reconcileSkipped=false means a patch was applied, the MachineSet was already up to +// date, or it is out of scope for the MAPI path (e.g. migrated to CAPI authority). +func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet, configMap *corev1.ConfigMap) (bool, error) { startTime := time.Now() klog.V(4).Infof("Started syncing MAPI machineset %q (%v)", machineSet.Name, startTime) @@ -162,40 +186,15 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet return false, fmt.Errorf("failed to fetch infra object during machineset sync: %w", err) } - // Fetch the bootimage configmap & ensure it has been stamped by the operator. This is done by - // the operator when a master node successfully updates to a new image. This is - // to prevent machinesets from being updated before the operator itself has updated. - // If it hasn't been updated, exit and wait for a resync. - configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName) - if err != nil { - return false, fmt.Errorf("failed to fetch coreos-bootimages config map during machineset sync: %w", err) - } - versionHashFromCM, versionHashFound := configMap.Data[ctrlcommon.MCOVersionHashKey] - if !versionHashFound { - klog.Infof("failed to find mco version hash in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName) - return true, nil - } - if versionHashFromCM != operatorversion.Hash { - klog.Infof("mismatch between MCO hash version stored in configmap and current MCO version; sync will exit to wait for the MCO upgrade to complete") - return true, nil - } - releaseVersionFromCM, releaseVersionFound := configMap.Data[ctrlcommon.OCPReleaseVersionKey] - if !releaseVersionFound { - klog.Infof("failed to find OCP release version in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName) - return true, nil - } - if releaseVersionFromCM != operatorversion.ReleaseVersion { - klog.Infof("mismatch between OCP release version stored in configmap and current MCO release version; sync will exit to wait for the MCO upgrade to complete") - return true, nil - } - // Check if the this MachineSet requires an update - patchRequired, patchSkipped, newMachineSet, err := checkMachineSet(infra, machineSet, configMap, arch, ctrl.kubeClient) + patchRequired, reconcileSkipped, newMachineSet, err := checkMachineSet(infra, machineSet, configMap, arch, ctrl.kubeClient) if err != nil { return false, fmt.Errorf("failed to reconcile machineset %s, err: %w", machineSet.Name, err) } - // Patch the machineset if required + if reconcileSkipped { + return true, nil + } if patchRequired { if ctrl.checkMAPIMachineSetHotLoop(newMachineSet, configMap, infra, arch) { return false, fmt.Errorf("refusing to reconcile machineset %s, hot loop detected. Please opt-out of boot image updates, adjust your machine provisioning workflow to prevent hot loops and opt back in to resume boot image updates", machineSet.Name) @@ -208,7 +207,7 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet return false, nil } klog.Infof("No patching required for MAPI machineset %s", machineSet.Name) - return patchSkipped, nil + return false, nil } // getMAPIBootImageValue returns the value used for hot loop detection. diff --git a/pkg/controller/bootimage/platform_helpers.go b/pkg/controller/bootimage/platform_helpers.go index ef354c4dc5..6581ab262d 100644 --- a/pkg/controller/bootimage/platform_helpers.go +++ b/pkg/controller/bootimage/platform_helpers.go @@ -42,9 +42,11 @@ type publisherOffer struct { publisher, offer string } -// This function calls the appropriate reconcile function based on the infra type -// On success, it will return a bool indicating if a patch is required, and an updated -// machineset object if any. It will return an error if any of the above steps fail. +// checkMachineSet calls the appropriate reconcile function based on the infra type. +// Returns (patchRequired, reconcileSkipped, newMachineSet, error). +// reconcileSkipped=true means the boot image could not be updated automatically (e.g. +// custom or unknown image) and requires manual intervention; the condition is surfaced +// via skew enforcement rather than returned as an error. func checkMachineSet(infra *osconfigv1.Infrastructure, machineSet *machinev1beta1.MachineSet, configMap *corev1.ConfigMap, arch string, secretClient clientset.Interface) (bool, bool, *machinev1beta1.MachineSet, error) { switch infra.Status.PlatformStatus.Type { case osconfigv1.AWSPlatformType: @@ -61,7 +63,8 @@ func checkMachineSet(infra *osconfigv1.Infrastructure, machineSet *machinev1beta } } -// Generic reconcile function that handles the common pattern across all platforms +// reconcilePlatform is a generic reconcile function that handles the common pattern across all platforms. +// Returns (patchRequired, reconcileSkipped, newMachineSet, error). See checkMachineSet for reconcileSkipped semantics. // nolint:dupl // I separated this from reconcilePlatformCPMS for readability func reconcilePlatform[T any]( machineSet *machinev1beta1.MachineSet, @@ -70,7 +73,7 @@ func reconcilePlatform[T any]( arch string, secretClient clientset.Interface, reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, bool, *T, error), -) (patchRequired, patchSkipped bool, newMachineSet *machinev1beta1.MachineSet, err error) { +) (patchRequired, reconcileSkipped bool, newMachineSet *machinev1beta1.MachineSet, err error) { klog.Infof("Reconciling MAPI machineset %s on %s, with arch %s", machineSet.Name, string(infra.Status.PlatformStatus.Type), arch) // Unmarshal the provider spec @@ -86,14 +89,14 @@ func reconcilePlatform[T any]( } // Reconcile the provider spec - patchRequired, patchSkipped, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, machineSet.Name, secretClient) + patchRequired, reconcileSkipped, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, machineSet.Name, secretClient) if err != nil { return false, false, nil, err } // If no patch is required, exit early if !patchRequired { - return false, patchSkipped, nil, nil + return false, reconcileSkipped, nil, nil } // If patch is required, marshal the new providerspec into the machineset diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index cff069be95..c4c1a9d1ef 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -2560,7 +2560,7 @@ func (optr *Operator) syncBootImageSkewEnforcementStatus(mcop *opv1.MachineConfi } // Grab install time OCP version - ocpVersionAtInstall := optr.getOCPInstallVersionFromClusterVersion() + ocpVersionAtInstall := optr.getOCPInstallVersion() // Spec override takes priority over all platform defaults. if mcop.Spec.BootImageSkewEnforcement != (opv1.BootImageSkewEnforcementConfig{}) { @@ -2601,40 +2601,15 @@ func (optr *Operator) syncBootImageSkewEnforcementStatus(mcop *opv1.MachineConfi newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusManualWithOCPVersion(ocpVersionAtInstall) } -// getOCPInstallVersionFromClusterVersion extracts the original install version from ClusterVersion history. -// It finds the oldest completed update in history and parses it to a clean version string. -// Returns an empty string if ClusterVersion cannot be retrieved or parsed. -func (optr *Operator) getOCPInstallVersionFromClusterVersion() string { +// getOCPInstallVersion extracts the original install version from ClusterVersion history. +// Returns "0.0.0" if the version cannot be determined, so that downstream skew checks always have a value to compare. +func (optr *Operator) getOCPInstallVersion() string { clusterVersion, err := optr.clusterVersionLister.Get("version") if err != nil { klog.Warningf("Failed to get ClusterVersion: %v, skipping boot image skew enforcement configuration", err) - return "" - } - if len(clusterVersion.Status.History) == 0 { - klog.Warningf("ClusterVersion has no history, skipping boot image skew enforcement configuration") - return "" - } - - // History is ordered by recency (newest first), so find the last completed entry (install version) - var installVersion string - for i := len(clusterVersion.Status.History) - 1; i >= 0; i-- { - if clusterVersion.Status.History[i].State == configv1.CompletedUpdate { - installVersion = clusterVersion.Status.History[i].Version - break - } - } - // ClusterVersion has no completed updates in history(install likely on-going), default to last value - if installVersion == "" { - klog.Warningf("ClusterVersion has no completed updates in history(install likely on-going), default to last value") - installVersion = clusterVersion.Status.History[len(clusterVersion.Status.History)-1].Version - } - // Scrape away CI/nightly tags if needed - parsedVersion, err := k8sversion.ParseGeneric(installVersion) - if err != nil { - klog.Warningf("Failed to parse install version %q: %v, use a placeholder for now", installVersion, err) return "0.0.0" } - return fmt.Sprintf("%d.%d.%d", parsedVersion.Major(), parsedVersion.Minor(), parsedVersion.Patch()) + return apihelpers.GetOCPInstallVersion(clusterVersion) } // getCurrentOCPVersionFromClusterVersion extracts the latest OCP version from ClusterVersion history.