Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions pkg/apihelpers/apihelpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package apihelpers
import (
"fmt"

configv1 "github.com/openshift/api/config/v1"
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
opv1 "github.com/openshift/api/operator/v1"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"
Expand All @@ -16,6 +17,7 @@ import (

"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
k8sversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -657,3 +659,32 @@ func GetSkewEnforcementStatusNone() opv1.BootImageSkewEnforcementStatus {
Mode: opv1.BootImageSkewEnforcementModeStatusNone,
}
}

// GetOCPInstallVersion extracts the install-time OCP version from
// ClusterVersion history — the oldest completed update entry, normalised to Major.Minor.Patch.
// Returns "0.0.0" as a conservative fallback if the history is empty or the version cannot be parsed.
func GetOCPInstallVersion(clusterVersion *configv1.ClusterVersion) string {
// This is not possible, but return a sane value to avoid API validation failures.
if len(clusterVersion.Status.History) == 0 {
klog.Warning("ClusterVersion has no history, cannot determine install version")
return "0.0.0"
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
var installVersion string
for i := len(clusterVersion.Status.History) - 1; i >= 0; i-- {
if clusterVersion.Status.History[i].State == configv1.CompletedUpdate {
installVersion = clusterVersion.Status.History[i].Version
break
}
}
if installVersion == "" {
installVersion = clusterVersion.Status.History[len(clusterVersion.Status.History)-1].Version
}
parsedVersion, err := k8sversion.ParseGeneric(installVersion)
if err != nil {
// Version strings from ClusterVersion history are always valid semver in practice.
// Log a warning and return 0.0.0 as a sanity check.
klog.Warningf("Failed to parse install version %q: %v", installVersion, err)
return "0.0.0"
}
return fmt.Sprintf("%d.%d.%d", parsedVersion.Major(), parsedVersion.Minor(), parsedVersion.Patch())
}
38 changes: 37 additions & 1 deletion pkg/controller/bootimage/boot_image_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (

mcopinformersv1 "github.com/openshift/client-go/operator/informers/externalversions/operator/v1"
mcoplistersv1 "github.com/openshift/client-go/operator/listers/operator/v1"
apihelpers "github.com/openshift/machine-config-operator/pkg/apihelpers"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/osimagestream"
)
Expand Down Expand Up @@ -628,7 +629,42 @@ func (ctrl *Controller) updateClusterBootImage() {
}

// Only make an API call if there is an update to the skew enforcement status
if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, newBootImageSkewEnforcementStatus) {
if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, *newBootImageSkewEnforcementStatus) {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was something caught by the new units; the old code wasn't dereferencing the newBootImageSkewEnforcementStatus pointer, meaning the API call was always being made - no bueno! 🫨

mcop.Status.BootImageSkewEnforcementStatus = *newBootImageSkewEnforcementStatus
ctrl.updateMachineConfigurationStatus(mcop.Status)
}
}

// resetClusterBootImage writes the cluster's install OCP version into ClusterBootImageAutomatic when
// one or more MachineSets were reconcileSkipped. The recorded value can no longer be trusted as an
// accurate description of the full cluster state (a newly-added or changed MachineSet may be on an
// older image), so we fall back to the install version — the oldest version we can reliably claim —
// so that the skew check correctly surfaces a violation if the cluster is genuinely out of date.
func (ctrl *Controller) resetClusterBootImage() {
mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{})
if err != nil {
klog.Errorf("error resetting cluster boot image record: %s", err)
return
}
if mcop.Status.BootImageSkewEnforcementStatus.Mode != opv1.BootImageSkewEnforcementModeStatusAutomatic {
return
}

// Default to a conservative value to be on the safe side.
ocpVersion := "0.0.0"
clusterVersion, err := ctrl.clusterVersionLister.Get("version")
if err != nil {
klog.Warningf("Failed to get ClusterVersion for boot image reset: %v", err)
} else {
ocpVersion = apihelpers.GetOCPInstallVersion(clusterVersion)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

newBootImageSkewEnforcementStatus := mcop.Status.BootImageSkewEnforcementStatus.DeepCopy()
newBootImageSkewEnforcementStatus.Automatic = opv1.ClusterBootImageAutomatic{
OCPVersion: ocpVersion,
}
if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, *newBootImageSkewEnforcementStatus) {
klog.Infof("Resetting cluster boot image record to install version %s due to reconcileSkipped MachineSets", ocpVersion)
mcop.Status.BootImageSkewEnforcementStatus = *newBootImageSkewEnforcementStatus
ctrl.updateMachineConfigurationStatus(mcop.Status)
}
Expand Down
109 changes: 108 additions & 1 deletion pkg/controller/bootimage/boot_image_controller_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package bootimage

import (
"context"
"testing"

"github.com/coreos/stream-metadata-go/stream"
"github.com/coreos/stream-metadata-go/stream/rhcos"
osconfigv1 "github.com/openshift/api/config/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
opv1 "github.com/openshift/api/operator/v1"
configlistersv1 "github.com/openshift/client-go/config/listers/config/v1"
fakemcopclient "github.com/openshift/client-go/operator/clientset/versioned/fake"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
Expand All @@ -16,6 +19,7 @@ import (
"k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
)

func TestIsClusterStable(t *testing.T) {
Expand Down Expand Up @@ -533,7 +537,7 @@ func TestReconcileAzureProviderSpec(t *testing.T) {
},
}

fakeClient := fake.NewSimpleClientset(testSecret)
fakeClient := fake.NewClientset(testSecret)

tests := []struct {
name string
Expand Down Expand Up @@ -969,3 +973,106 @@ func TestReconcileAzureProviderSpec(t *testing.T) {
})
}
}

func TestResetClusterBootImage(t *testing.T) {
cases := []struct {
name string
history []osconfigv1.UpdateHistory
mode opv1.BootImageSkewEnforcementModeStatus
initialOCPVersion string
expectedVersion string
}{
{
name: "resets to oldest completed entry (install version)",
history: []osconfigv1.UpdateHistory{
{State: osconfigv1.CompletedUpdate, Version: "4.19.0"},
{State: osconfigv1.CompletedUpdate, Version: "4.18.0"},
},
mode: opv1.BootImageSkewEnforcementModeStatusAutomatic,
initialOCPVersion: "4.19.0",
expectedVersion: "4.18.0",
},
{
name: "no-op when mode is not automatic",
history: []osconfigv1.UpdateHistory{
{State: osconfigv1.CompletedUpdate, Version: "4.18.0"},
},
mode: opv1.BootImageSkewEnforcementModeStatusNone,
initialOCPVersion: "4.19.0",
expectedVersion: "4.19.0",
},
{
name: "no-op when already at install version",
history: []osconfigv1.UpdateHistory{
{State: osconfigv1.CompletedUpdate, Version: "4.18.0"},
},
mode: opv1.BootImageSkewEnforcementModeStatusAutomatic,
initialOCPVersion: "4.18.0",
expectedVersion: "4.18.0",
},
{
name: "falls back to 0.0.0 when history is empty",
history: []osconfigv1.UpdateHistory{},
mode: opv1.BootImageSkewEnforcementModeStatusAutomatic,
initialOCPVersion: "4.19.0",
expectedVersion: "0.0.0",
},
{
name: "uses last entry when no completed update exists",
history: []osconfigv1.UpdateHistory{
{State: osconfigv1.PartialUpdate, Version: "4.18.0"},
},
mode: opv1.BootImageSkewEnforcementModeStatusAutomatic,
initialOCPVersion: "4.19.0",
expectedVersion: "4.18.0",
},
{
// history==nil is the sentinel for "do not populate the lister",
// exercising the clusterVersionLister.Get() error path.
name: "falls back to 0.0.0 when ClusterVersion unavailable",
history: nil,
mode: opv1.BootImageSkewEnforcementModeStatusAutomatic,
initialOCPVersion: "4.19.0",
expectedVersion: "0.0.0",
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
mcop := &opv1.MachineConfiguration{
ObjectMeta: v1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName},
Status: opv1.MachineConfigurationStatus{
BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{
Mode: tc.mode,
Automatic: opv1.ClusterBootImageAutomatic{
OCPVersion: tc.initialOCPVersion,
},
},
},
}

cvIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{})
if tc.history != nil {
cv := &osconfigv1.ClusterVersion{
ObjectMeta: v1.ObjectMeta{Name: "version"},
Status: osconfigv1.ClusterVersionStatus{History: tc.history},
}
require.NoError(t, cvIndexer.Add(cv))
}

fakeMcopClient := fakemcopclient.NewClientset(mcop)
ctrl := &Controller{
mcopClient: fakeMcopClient,
clusterVersionLister: configlistersv1.NewClusterVersionLister(cvIndexer),
}

ctrl.resetClusterBootImage()

updated, err := fakeMcopClient.OperatorV1().MachineConfigurations().Get(
context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, v1.GetOptions{})
require.NoError(t, err)
assert.Equal(t, tc.expectedVersion,
updated.Status.BootImageSkewEnforcementStatus.Automatic.OCPVersion)
})
}
}
25 changes: 1 addition & 24 deletions pkg/controller/bootimage/cpms_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
machinev1 "github.com/openshift/api/machine/v1"
opv1 "github.com/openshift/api/operator/v1"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
operatorversion "github.com/openshift/machine-config-operator/pkg/version"
"sigs.k8s.io/yaml"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -145,31 +144,9 @@ func (ctrl *Controller) syncControlPlaneMachineSet(controlPlaneMachineSet *machi
return fmt.Errorf("failed to fetch infra object during ControlPlaneMachineSet sync: %w", err)
}

// Fetch the bootimage configmap & ensure it has been stamped by the operator. This is done by
// the operator when a master node successfully updates to a new image. This is
// to prevent machinesets from being updated before the operator itself has updated.
// If it hasn't been updated, exit and wait for a resync.
configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName)
if err != nil {
return fmt.Errorf("failed to fetch coreos-bootimages config map duringControlPlaneMachineSet sync: %w", err)
}
versionHashFromCM, versionHashFound := configMap.Data[ctrlcommon.MCOVersionHashKey]
if !versionHashFound {
klog.Infof("failed to find mco version hash in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName)
return nil
}
if versionHashFromCM != operatorversion.Hash {
klog.Infof("mismatch between MCO hash version stored in configmap and current MCO version; sync will exit to wait for the MCO upgrade to complete")
return nil
}
releaseVersionFromCM, releaseVersionFound := configMap.Data[ctrlcommon.OCPReleaseVersionKey]
if !releaseVersionFound {
klog.Infof("failed to find OCP release version in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName)
return nil
}
if releaseVersionFromCM != operatorversion.ReleaseVersion {
klog.Infof("mismatch between OCP release version stored in configmap and current MCO release version; sync will exit to wait for the MCO upgrade to complete")
return nil
return fmt.Errorf("failed to fetch coreos-bootimages config map during ControlPlaneMachineSet sync: %w", err)
}

// Check if the this ControlPlaneMachineSet requires an update
Expand Down
Loading