diff --git a/pkg/controller/drain/drain_controller.go b/pkg/controller/drain/drain_controller.go index 42063f44de..05d01ac4e3 100644 --- a/pkg/controller/drain/drain_controller.go +++ b/pkg/controller/drain/drain_controller.go @@ -361,6 +361,37 @@ func (ctrl *Controller) syncNode(key string) error { if err != nil { klog.Errorf("Error making MCN for UnCordon success: %v", err) } + case daemonconsts.DrainerStateCordon: + ctrl.logNode(node, "cordoning without drain") + if err := ctrl.cordonOrUncordonNode(true, node, drainer); err != nil { + nErr := upgrademonitor.GenerateAndApplyMachineConfigNodes( + &upgrademonitor.Condition{State: v1.MachineConfigNodeUpdateExecuted, Reason: string(v1.MachineConfigNodeUpdateCordoned), Message: "Failed to Cordon Node without drain"}, + &upgrademonitor.Condition{State: v1.MachineConfigNodeUpdateCordoned, Reason: fmt.Sprintf("%s%s", string(v1.MachineConfigNodeUpdateExecuted), string(v1.MachineConfigNodeUpdateCordoned)), Message: fmt.Sprintf("Error: Failed to Cordon node without drain. Error is %s, The node is reporting Unschedulable = %t", err.Error(), node.Spec.Unschedulable)}, + metav1.ConditionUnknown, + metav1.ConditionUnknown, + node, + ctrl.client, + ctrl.fgHandler, + pool, + ) + if nErr != nil { + klog.Errorf("Error making MCN for Cordon-only Failure: %v", nErr) + } + return fmt.Errorf("node %s: failed to cordon: %v", node.Name, err) + } + err = upgrademonitor.GenerateAndApplyMachineConfigNodes( + &upgrademonitor.Condition{State: v1.MachineConfigNodeUpdateExecuted, Reason: string(v1.MachineConfigNodeUpdateCordoned), Message: "Cordoned Node without drain"}, + &upgrademonitor.Condition{State: v1.MachineConfigNodeUpdateCordoned, Reason: fmt.Sprintf("%s%s", string(v1.MachineConfigNodeUpdateExecuted), string(v1.MachineConfigNodeUpdateCordoned)), Message: fmt.Sprintf("Cordoned node without drain. The node is reporting Unschedulable = %t", node.Spec.Unschedulable)}, + metav1.ConditionUnknown, + metav1.ConditionTrue, + node, + ctrl.client, + ctrl.fgHandler, + pool, + ) + if err != nil { + klog.Errorf("Error making MCN for Cordon-only Success: %v", err) + } case daemonconsts.DrainerStateDrain: if err := ctrl.drainNode(node, drainer); err != nil { diff --git a/pkg/controller/drain/drain_controller_test.go b/pkg/controller/drain/drain_controller_test.go index e50bd370ef..7905b236d4 100644 --- a/pkg/controller/drain/drain_controller_test.go +++ b/pkg/controller/drain/drain_controller_test.go @@ -36,6 +36,7 @@ const ( testPoolName = "worker" testDrainState = "drain-test-hash" testUncordonState = "uncordon-test-hash" + testCordonState = "cordon-test-hash" ) func createTestNode(name string, unschedulable bool) *corev1.Node { @@ -276,4 +277,27 @@ func TestSyncNode(t *testing.T) { assert.Equal(t, true, spec["unschedulable"], "only patch should be the cordon") } }) + + t.Run("cordon-only requested (SNO)", func(t *testing.T) { + node := createDrainTestNode(testNodeName, false, testCordonState, "") + _, kubeClient, err := setupControllerAndSync(node, nil) + assert.NoError(t, err, "syncNode should not error for cordon-only action") + + // Verify patch operations: cordon (unschedulable=true) + completion annotation + verifyDrainPatches(t, kubeClient, true, testCordonState) + }) + + t.Run("cordon-only already completed", func(t *testing.T) { + node := createDrainTestNode(testNodeName, true, testCordonState, testCordonState) + _, kubeClient, err := setupControllerAndSync(node, nil) + assert.NoError(t, err, "syncNode should not error when cordon already completed") + + patchActions := []core.PatchAction{} + for _, action := range kubeClient.Actions() { + if patchAction, ok := action.(core.PatchAction); ok { + patchActions = append(patchActions, patchAction) + } + } + assert.Len(t, patchActions, 0, "should have made no patch requests when cordon already completed") + }) } diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index abf05de0c9..2c6ae2b8c5 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -31,6 +31,8 @@ const ( DrainerStateDrain = "drain" // DrainerStateUncordon is used for drainer annotation as a value to indicate needing an uncordon DrainerStateUncordon = "uncordon" + // DrainerStateCordon is used for drainer annotation as a value to indicate needing a cordon without drain (for SNO) + DrainerStateCordon = "cordon" // ClusterControlPlaneTopologyAnnotationKey is set by the node controller by reading value from // controllerConfig. MCD uses the annotation value to decide drain action on the node. ClusterControlPlaneTopologyAnnotationKey = "machineconfiguration.openshift.io/controlPlaneTopology" diff --git a/pkg/daemon/drain.go b/pkg/daemon/drain.go index 23cd1ec20c..032cfbbf82 100644 --- a/pkg/daemon/drain.go +++ b/pkg/daemon/drain.go @@ -10,14 +10,11 @@ import ( "github.com/BurntSushi/toml" "github.com/containers/image/v5/pkg/sysregistriesv2" ign3types "github.com/coreos/ignition/v2/config/v3_5/types" - mcfgv1 "github.com/openshift/api/machineconfiguration/v1" opv1 "github.com/openshift/api/operator/v1" "github.com/openshift/machine-config-operator/pkg/apihelpers" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/pkg/daemon/constants" - "github.com/openshift/machine-config-operator/pkg/helpers" - "github.com/openshift/machine-config-operator/pkg/upgrademonitor" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -40,29 +37,8 @@ func (dn *Daemon) performDrain() error { } if !dn.drainRequired() { - logSystem("Drain not required, skipping") - - // Get MCP associated with node - pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node) - if err != nil { - return err - } - - err = upgrademonitor.GenerateAndApplyMachineConfigNodes( - &upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateExecuted, Reason: string(mcfgv1.MachineConfigNodeUpdateDrained), Message: "Node Drain Not required for this update."}, - &upgrademonitor.Condition{State: mcfgv1.MachineConfigNodeUpdateDrained, Reason: fmt.Sprintf("%s%s", string(mcfgv1.MachineConfigNodeUpdateExecuted), string(mcfgv1.MachineConfigNodeUpdateDrained)), Message: "Node Drain Not required for this update."}, - metav1.ConditionUnknown, - metav1.ConditionFalse, - dn.node, - dn.mcfgClient, - dn.fgHandler, - pool, - ) - if err != nil { - klog.Errorf("Error making MCN for Drain not required: %v", err) - } - dn.nodeWriter.Eventf(corev1.EventTypeNormal, "Drain", "Drain not required, skipping") - return nil + logSystem("Drain not required on single-node topology, requesting cordon only") + return dn.performCordonOnly() } desiredConfigName, err := getNodeAnnotation(dn.node, constants.DesiredMachineConfigAnnotationKey) @@ -127,6 +103,52 @@ func (dn *Daemon) performDrain() error { return nil } +// performCordonOnly requests the DrainController to cordon the node (mark unschedulable) +// without draining pods. Used on single-node clusters where drain would deadlock on PDBs +// but we still want to prevent new workloads from being scheduled during updates. +func (dn *Daemon) performCordonOnly() error { + desiredConfigName, err := getNodeAnnotation(dn.node, constants.DesiredMachineConfigAnnotationKey) + if err != nil { + return err + } + desiredCordonValue := fmt.Sprintf("%s-%s", constants.DrainerStateCordon, desiredConfigName) + if dn.node.Annotations[constants.LastAppliedDrainerAnnotationKey] == desiredCordonValue { + logSystem("node is already cordoned") + return nil + } + + logSystem("Update prepared; requesting cordon only via annotation to controller") + dn.nodeWriter.Eventf(corev1.EventTypeNormal, "Cordon", "Cordoned node to apply update") + + if err := dn.nodeWriter.SetDesiredDrainer(desiredCordonValue); err != nil { + return fmt.Errorf("could not set cordon annotation: %w", err) + } + + ctx := context.TODO() + if err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 2*time.Minute, false, func(ctx context.Context) (bool, error) { + node, err := dn.kubeClient.CoreV1().Nodes().Get(ctx, dn.name, metav1.GetOptions{}) + if err != nil { + klog.Warningf("Failed to get node: %v", err) + return false, nil + } + if node.Annotations[constants.DesiredDrainerAnnotationKey] != node.Annotations[constants.LastAppliedDrainerAnnotationKey] { + return false, nil + } + return true, nil + }); err != nil { + if wait.Interrupted(err) { + failMsg := fmt.Sprintf("failed to cordon node: %s after 2 minutes. Please see machine-config-controller logs for more information", dn.node.Name) + dn.nodeWriter.Eventf(corev1.EventTypeWarning, "FailedToCordon", failMsg) + return errors.New(failMsg) + } + return fmt.Errorf("something went wrong while attempting to cordon node: %v", err) + } + + logSystem("cordon complete") + + return nil +} + // isDrainRequiredForNodeDisruptionActions determines whether node drain is required or not to apply config changes for this set of NodeDisruptionActions func isDrainRequiredForNodeDisruptionActions(actions []opv1.NodeDisruptionPolicyStatusAction, oldIgnConfig, newIgnConfig ign3types.Config) (bool, error) { klog.Infof("Checking drain required for node disruption actions") diff --git a/test/e2e-single-node/sno_mcd_test.go b/test/e2e-single-node/sno_mcd_test.go index 3780079410..ac20d5eb8d 100644 --- a/test/e2e-single-node/sno_mcd_test.go +++ b/test/e2e-single-node/sno_mcd_test.go @@ -383,6 +383,100 @@ func TestNoReboot(t *testing.T) { t.Logf("Node %s didn't reboot as expected during rollback, uptime increased from %f to %f ", node.Name, uptimeOld, uptimeNew) } +func TestSNOCordonDuringUpdate(t *testing.T) { + cs := framework.NewClientSet("") + + mcp, err := cs.MachineConfigPools().Get(context.TODO(), "master", metav1.GetOptions{}) + require.Nil(t, err) + oldMasterRenderedConfig := mcp.Status.Configuration.Name + + // Create a MC with kernel arguments to force a reboot-requiring update + kargsMC := &mcfgv1.MachineConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("sno-cordon-test-%s", uuid.NewUUID()), + Labels: helpers.MCLabelForRole("master"), + }, + Spec: mcfgv1.MachineConfigSpec{ + Config: runtime.RawExtension{ + Raw: helpers.MarshalOrDie(ctrlcommon.NewIgnConfig()), + }, + KernelArguments: []string{"sno-cordon-e2e-test"}, + }, + } + + _, err = cs.MachineConfigs().Create(context.TODO(), kargsMC, metav1.CreateOptions{}) + require.Nil(t, err) + t.Logf("Created %s", kargsMC.Name) + + renderedConfig, err := helpers.WaitForRenderedConfig(t, cs, "master", kargsMC.Name) + require.Nil(t, err) + t.Logf("Rendered config: %s", renderedConfig) + + // Poll for the node to become Unschedulable (cordoned) while the update is in flight. + // On SNO, the MCD requests cordon-only (no drain) before applying changes. + node := helpers.GetSingleNodeByRole(t, cs, "master") + cordonObserved := false + t.Logf("Polling for node %s to become Unschedulable...", node.Name) + // The cordon should happen within a few minutes of the rendered config being available. + // We poll aggressively (1s) because the window between cordon and reboot can be short. + err = wait.PollUntilContextTimeout(context.TODO(), 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + n, err := cs.CoreV1Interface.Nodes().Get(ctx, node.Name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + if n.Spec.Unschedulable { + t.Logf("Node %s is Unschedulable (cordoned) during update", node.Name) + cordonObserved = true + return true, nil + } + return false, nil + }) + // If we timed out, the cordon was never observed. This is a test failure. + require.Nil(t, err, "timed out waiting for node to be cordoned during update") + assert.True(t, cordonObserved, "node should have been cordoned during update") + + // Verify the drain annotation shows cordon (not drain) was requested + n, err := cs.CoreV1Interface.Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + require.Nil(t, err) + desiredDrain := n.Annotations[constants.DesiredDrainerAnnotationKey] + assert.True(t, strings.HasPrefix(desiredDrain, "cordon-"), + "desiredDrain annotation should use cordon verb, got: %s", desiredDrain) + t.Logf("Drain annotation confirms cordon-only: %s", desiredDrain) + + // Wait for the full update to complete (includes reboot and uncordon) + err = waitForSingleNodePoolComplete(t, cs, "master", renderedConfig) + require.Nil(t, err) + + // Verify post-completion state + node = helpers.GetSingleNodeByRole(t, cs, "master") + assert.Equal(t, renderedConfig, node.Annotations[constants.CurrentMachineConfigAnnotationKey]) + assert.Equal(t, constants.MachineConfigDaemonStateDone, node.Annotations[constants.MachineConfigDaemonStateAnnotationKey]) + assert.False(t, node.Spec.Unschedulable, "node should be schedulable after update completes") + + // The drain annotations should both show uncordon- (matching = completed) + desiredDrain = node.Annotations[constants.DesiredDrainerAnnotationKey] + lastAppliedDrain := node.Annotations[constants.LastAppliedDrainerAnnotationKey] + assert.Equal(t, desiredDrain, lastAppliedDrain, + "desiredDrain and lastAppliedDrain should match after update completes") + assert.True(t, strings.HasPrefix(desiredDrain, "uncordon-"), + "final drain annotation should show uncordon verb, got: %s", desiredDrain) + t.Logf("Post-update drain annotations match: %s", desiredDrain) + + // Cleanup: delete MC and rollback + if err := cs.MachineConfigs().Delete(context.TODO(), kargsMC.Name, metav1.DeleteOptions{}); err != nil { + t.Error(err) + } + t.Logf("Deleted MachineConfig %s", kargsMC.Name) + err = waitForSingleNodePoolComplete(t, cs, "master", oldMasterRenderedConfig) + require.Nil(t, err) + + node = helpers.GetSingleNodeByRole(t, cs, "master") + assert.Equal(t, oldMasterRenderedConfig, node.Annotations[constants.CurrentMachineConfigAnnotationKey]) + assert.Equal(t, constants.MachineConfigDaemonStateDone, node.Annotations[constants.MachineConfigDaemonStateAnnotationKey]) + assert.False(t, node.Spec.Unschedulable, "node should be schedulable after rollback") + t.Logf("Node %s has successfully rolled back", node.Name) +} + func TestRunShared(t *testing.T) { mcpName := "master"