From 091e5ab9e61e4f73d40faaea05e5dff9a1f5721e Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 14:28:12 +0200 Subject: [PATCH 01/27] feat(vmop): add migration progress status and reasons Signed-off-by: Daniil Antoshin --- api/client/kubeclient/async.go | 12 +- api/client/kubeclient/streamer.go | 4 +- api/client/kubeclient/websocket.go | 4 +- .../v1alpha2/virtual_machine_operation.go | 5 + api/core/v1alpha2/vmopcondition/condition.go | 36 +++ api/core/v1alpha2/zz_generated.deepcopy.go | 5 + crds/doc-ru-virtualmachineoperations.yaml | 3 + crds/virtualmachineoperations.yaml | 12 + .../pkg/controller/vm/internal/migrating.go | 8 +- .../vm/internal/service/migration_volumes.go | 2 +- .../migration/internal/handler/lifecycle.go | 256 +++++++++++++----- .../monitoring/metrics/vmop/data_metric.go | 4 +- 12 files changed, 275 insertions(+), 76 deletions(-) diff --git a/api/client/kubeclient/async.go b/api/client/kubeclient/async.go index 61ed3f56d0..2d22345fbf 100644 --- a/api/client/kubeclient/async.go +++ b/api/client/kubeclient/async.go @@ -49,7 +49,7 @@ func (aws *asyncWSRoundTripper) WebsocketCallback(ws *websocket.Conn, resp *http if resp != nil && resp.StatusCode != http.StatusOK { return enrichError(err, resp) } - return fmt.Errorf("Can't connect to websocket: %s\n", err.Error()) + return fmt.Errorf("can't connect to websocket: %w", err) } aws.Connection <- ws @@ -105,7 +105,9 @@ func asyncSubresourceHelper( } if response != nil { - defer response.Body.Close() + defer func() { + _ = response.Body.Close() + }() switch response.StatusCode { case http.StatusOK: case http.StatusNotFound: @@ -165,7 +167,7 @@ func enrichError(httpErr error, resp *http.Response) error { if resp == nil { return httpErr } - httpErr = fmt.Errorf("Can't connect to websocket (%d): %s\n", resp.StatusCode, httpErr.Error()) + httpErr = fmt.Errorf("can't connect to websocket (%d): %w", resp.StatusCode, httpErr) status := &metav1.Status{} if resp.Header.Get("Content-Type") != "application/json" { @@ -201,7 +203,9 @@ type WebsocketRoundTripper struct { func (d *WebsocketRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) { conn, resp, err := d.Dialer.Dial(r.URL.String(), r.Header) if err == nil { - defer conn.Close() + defer func() { + _ = conn.Close() + }() } return resp, d.Do(conn, resp, err) } diff --git a/api/client/kubeclient/streamer.go b/api/client/kubeclient/streamer.go index 216bfd2f0e..120929a398 100644 --- a/api/client/kubeclient/streamer.go +++ b/api/client/kubeclient/streamer.go @@ -69,10 +69,10 @@ type wsConn struct { } func (c *wsConn) SetDeadline(t time.Time) error { - if err := c.Conn.SetWriteDeadline(t); err != nil { + if err := c.SetWriteDeadline(t); err != nil { return err } - return c.Conn.SetReadDeadline(t) + return c.SetReadDeadline(t) } func NewWebsocketStreamer(conn *websocket.Conn, done chan struct{}) *wsStreamer { diff --git a/api/client/kubeclient/websocket.go b/api/client/kubeclient/websocket.go index 94c2018732..2cb4e4ccfb 100644 --- a/api/client/kubeclient/websocket.go +++ b/api/client/kubeclient/websocket.go @@ -76,7 +76,9 @@ func (s *binaryWriter) Write(p []byte) (int, error) { if err != nil { return 0, convert(err) } - defer w.Close() + defer func() { + _ = w.Close() + }() n, err := w.Write(p) return n, err } diff --git a/api/core/v1alpha2/virtual_machine_operation.go b/api/core/v1alpha2/virtual_machine_operation.go index 1b11cdc0a4..c367e3e5f3 100644 --- a/api/core/v1alpha2/virtual_machine_operation.go +++ b/api/core/v1alpha2/virtual_machine_operation.go @@ -31,6 +31,7 @@ const ( // +kubebuilder:subresource:status // +kubebuilder:resource:categories={virtualization},scope=Namespaced,shortName={vmop},singular=virtualmachineoperation // +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="VirtualMachineOperation phase." +// +kubebuilder:printcolumn:name="Progress",type="integer",JSONPath=".status.progress",description="VirtualMachineOperation progress in percent." // +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type",description="VirtualMachineOperation type." // +kubebuilder:printcolumn:name="VirtualMachine",type="string",JSONPath=".spec.virtualMachineName",description="VirtualMachine name." // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time of resource creation." @@ -109,6 +110,10 @@ type VirtualMachineOperationCloneCustomization struct { type VirtualMachineOperationStatus struct { Phase VMOPPhase `json:"phase"` + // Progress reports operation completion percentage for migration-related VMOPs (Evict/Migrate). + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + Progress *int32 `json:"progress,omitempty"` // The latest detailed observations of the VirtualMachineOperation resource. Conditions []metav1.Condition `json:"conditions,omitempty"` // Resource generation last processed by the controller. diff --git a/api/core/v1alpha2/vmopcondition/condition.go b/api/core/v1alpha2/vmopcondition/condition.go index 58d917ab5a..9326c306ec 100644 --- a/api/core/v1alpha2/vmopcondition/condition.go +++ b/api/core/v1alpha2/vmopcondition/condition.go @@ -86,6 +86,42 @@ const ( // ReasonMigrationRunning is a ReasonCompleted indicating that the migration process is currently in progress. ReasonMigrationRunning ReasonCompleted = "MigrationRunning" + // ReasonDisksPreparing indicates that migration-related disk preparation is in progress. + ReasonDisksPreparing ReasonCompleted = "DisksPreparing" + + // ReasonTargetScheduling indicates that the target pod is being scheduled. + ReasonTargetScheduling ReasonCompleted = "TargetScheduling" + + // ReasonTargetUnschedulable indicates that the target pod cannot be scheduled. + ReasonTargetUnschedulable ReasonCompleted = "TargetUnschedulable" + + // ReasonTargetDiskError indicates that target disk attachment failed. + ReasonTargetDiskError ReasonCompleted = "TargetDiskError" + + // ReasonTargetPreparing indicates that target pod is being prepared. + ReasonTargetPreparing ReasonCompleted = "TargetPreparing" + + // ReasonSyncing indicates that source and target are synchronizing migration data. + ReasonSyncing ReasonCompleted = "Syncing" + + // ReasonNotConverging indicates that migration cannot converge even with maximum throttling. + ReasonNotConverging ReasonCompleted = "NotConverging" + + // ReasonSourceSuspended indicates that source VM has been suspended. + ReasonSourceSuspended ReasonCompleted = "SourceSuspended" + + // ReasonTargetResumed indicates that target VM has resumed. + ReasonTargetResumed ReasonCompleted = "TargetResumed" + + // ReasonMigrationCompleted indicates that migration has completed successfully. + ReasonMigrationCompleted ReasonCompleted = "Completed" + + // ReasonAborted indicates that migration has been aborted. + ReasonAborted ReasonCompleted = "Aborted" + + // ReasonFailed indicates that migration failed for an unspecified reason. + ReasonFailed ReasonCompleted = "Failed" + // ReasonOtherMigrationInProgress is a ReasonCompleted indicating that there are other migrations in progress. ReasonOtherMigrationInProgress ReasonCompleted = "OtherMigrationInProgress" diff --git a/api/core/v1alpha2/zz_generated.deepcopy.go b/api/core/v1alpha2/zz_generated.deepcopy.go index e1939fd64a..0f918a95bd 100644 --- a/api/core/v1alpha2/zz_generated.deepcopy.go +++ b/api/core/v1alpha2/zz_generated.deepcopy.go @@ -2996,6 +2996,11 @@ func (in *VirtualMachineOperationSpec) DeepCopy() *VirtualMachineOperationSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VirtualMachineOperationStatus) DeepCopyInto(out *VirtualMachineOperationStatus) { *out = *in + if in.Progress != nil { + in, out := &in.Progress, &out.Progress + *out = new(int32) + **out = **in + } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]v1.Condition, len(*in)) diff --git a/crds/doc-ru-virtualmachineoperations.yaml b/crds/doc-ru-virtualmachineoperations.yaml index 0c9713d792..d06acf316c 100644 --- a/crds/doc-ru-virtualmachineoperations.yaml +++ b/crds/doc-ru-virtualmachineoperations.yaml @@ -128,6 +128,9 @@ spec: * `Completed` — операция прошла успешно; * `Failed` — операция завершилась неудачно. За подробностями обратитесь к полю `conditions` и событиям; * `Terminating` — операция удаляется. + progress: + description: | + Прогресс выполнения операции в процентах для миграционных VMOP (`Evict`/`Migrate`). observedGeneration: description: | Поколение ресурса, которое в последний раз обрабатывалось контроллером. diff --git a/crds/virtualmachineoperations.yaml b/crds/virtualmachineoperations.yaml index 86b75a01c4..7c79b47bb2 100644 --- a/crds/virtualmachineoperations.yaml +++ b/crds/virtualmachineoperations.yaml @@ -26,6 +26,10 @@ spec: jsonPath: .status.phase name: Phase type: string + - description: VirtualMachineOperation progress in percent. + jsonPath: .status.progress + name: Progress + type: integer - description: VirtualMachineOperation type. jsonPath: .spec.type name: Type @@ -319,6 +323,14 @@ spec: - Failed - Terminating type: string + progress: + description: + Progress reports operation completion percentage for + migration-related VMOPs (Evict/Migrate). + format: int32 + maximum: 100 + minimum: 0 + type: integer resources: description: Resources contains the list of resources that are affected diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/migrating.go b/images/virtualization-artifact/pkg/controller/vm/internal/migrating.go index b7a4ba143a..99d3f0609d 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/migrating.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/migrating.go @@ -162,16 +162,16 @@ func (h *MigratingHandler) syncMigrating(ctx context.Context, s state.VirtualMac completed, _ := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions) switch completed.Reason { - case vmopcondition.ReasonMigrationPending.String(): + case vmopcondition.ReasonMigrationPending.String(), vmopcondition.ReasonTargetScheduling.String(): cb.Message("Migration is awaiting start.") case vmopcondition.ReasonQuotaExceeded.String(): cb.Message(fmt.Sprintf("Migration is pending: %s.", completed.Message)) - case vmopcondition.ReasonMigrationPrepareTarget.String(): + case vmopcondition.ReasonMigrationPrepareTarget.String(), vmopcondition.ReasonTargetPreparing.String(), vmopcondition.ReasonDisksPreparing.String(): cb.Message("Migration is awaiting target preparation.") - case vmopcondition.ReasonMigrationTargetReady.String(): + case vmopcondition.ReasonMigrationTargetReady.String(), vmopcondition.ReasonSyncing.String(), vmopcondition.ReasonSourceSuspended.String(), vmopcondition.ReasonTargetResumed.String(): cb.Message("Migration is awaiting execution.") case vmopcondition.ReasonWaitingForVirtualMachineToBeReadyToMigrate.String(): @@ -183,7 +183,7 @@ func (h *MigratingHandler) syncMigrating(ctx context.Context, s state.VirtualMac case vmopcondition.ReasonMigrationRunning.String(): cb.Status(metav1.ConditionTrue).Reason(vmcondition.ReasonMigratingInProgress) - case vmopcondition.ReasonOperationCompleted.String(): + case vmopcondition.ReasonOperationCompleted.String(), vmopcondition.ReasonMigrationCompleted.String(): conditions.RemoveCondition(vmcondition.TypeMigrating, &vm.Status.Conditions) return nil diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/service/migration_volumes.go b/images/virtualization-artifact/pkg/controller/vm/internal/service/migration_volumes.go index 5f208c9f1c..2fc332e9ae 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/service/migration_volumes.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/service/migration_volumes.go @@ -94,7 +94,7 @@ func (s MigrationVolumesService) SyncVolumes(ctx context.Context, vmState state. if vmop != nil { completed, _ := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions) switch completed.Reason { - case vmopcondition.ReasonMigrationPrepareTarget.String(), vmopcondition.ReasonMigrationTargetReady.String(), vmopcondition.ReasonMigrationRunning.String(): + case vmopcondition.ReasonMigrationPrepareTarget.String(), vmopcondition.ReasonMigrationTargetReady.String(), vmopcondition.ReasonMigrationRunning.String(), vmopcondition.ReasonTargetPreparing.String(), vmopcondition.ReasonSyncing.String(), vmopcondition.ReasonSourceSuspended.String(), vmopcondition.ReasonTargetResumed.String(): return reconcile.Result{}, nil } } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index dad39694b6..fea3a2aaa1 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -19,6 +19,8 @@ package handler import ( "context" "fmt" + "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -42,6 +44,27 @@ import ( const lifecycleHandlerName = "LifecycleHandler" +const ( + progressDisksPreparing int32 = 1 + progressTargetScheduling int32 = 2 + progressTargetPreparing int32 = 3 + progressSyncingMin int32 = 10 + progressSyncingMax int32 = 90 + progressSourceSuspended int32 = 91 + progressTargetResumed int32 = 92 + progressMigrationCompleted int32 = 100 + + syncingSecondsPerPercent = 2 +) + +const ( + messageSyncingSourceAndTarget = "Syncing source and target" + messageTargetPodScheduling = "Target pod is being scheduled" + messageTargetPodPreparing = "Target pod is being prepared" + messageTargetVMResumed = "Target VM resumed" + messageSourceVMSuspended = "Source VM suspended" +) + type Base interface { Init(vmop *v1alpha2.VirtualMachineOperation) ShouldExecuteOrSetFailedPhase(ctx context.Context, vmop *v1alpha2.VirtualMachineOperation) (bool, error) @@ -264,44 +287,42 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp vmop.Status.Phase = v1alpha2.VMOPPhaseFailed h.recorder.Event(vmop, corev1.EventTypeWarning, v1alpha2.ReasonErrVMOPFailed, "VirtualMachineOperation failed") - msg := "Migration failed" - if mig.Status.MigrationState != nil && mig.Status.MigrationState.FailureReason != "" { - msg += ": " + mig.Status.MigrationState.FailureReason - } - msgByFailedReason := getMessageByMigrationFailedReason(mig) - if msgByFailedReason != "" { - msg += ": " + msgByFailedReason - } + reason := h.getFailedReason(mig) + msg := h.getFailedMessage(reason, mig) + progress := calculateMigrationProgress(vmop, mig, reason) + vmop.Status.Progress = ptrToInt32(progress) completedCond. Status(metav1.ConditionFalse). - Reason(vmopcondition.ReasonOperationFailed). + Reason(reason). Message(msg) conditions.SetCondition(completedCond, &vmop.Status.Conditions) return nil case virtv1.MigrationSucceeded: vmop.Status.Phase = v1alpha2.VMOPPhaseCompleted h.recorder.Event(vmop, corev1.EventTypeNormal, v1alpha2.ReasonVMOPSucceeded, "VirtualMachineOperation succeeded") + vmop.Status.Progress = ptrToInt32(100) completedCond. Status(metav1.ConditionTrue). - Reason(vmopcondition.ReasonOperationCompleted) + Reason(vmopcondition.ReasonMigrationCompleted) conditions.SetCondition(completedCond, &vmop.Status.Conditions) return nil } // 3. Migration in progress. Set in progress phase - vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress - reason := mapMigrationPhaseToReason[mig.Status.Phase] - if reason == vmopcondition.ReasonMigrationPending { - vmop.Status.Phase = v1alpha2.VMOPPhasePending - } - - msg, err := h.getConditionCompletedMessageByReason(ctx, reason, mig) + reason, msg, err := h.getInProgressReasonAndMessage(ctx, mig) if err != nil { return err } + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + if reason == vmopcondition.ReasonTargetScheduling { + vmop.Status.Phase = v1alpha2.VMOPPhasePending + } + progress := calculateMigrationProgress(vmop, mig, reason) + vmop.Status.Progress = ptrToInt32(progress) + completedCond. Status(metav1.ConditionFalse). Reason(reason). @@ -363,6 +384,7 @@ func (h LifecycleHandler) canExecute(vmop *v1alpha2.VirtualMachineOperation, vm if migratable.Status == metav1.ConditionTrue { vmop.Status.Phase = v1alpha2.VMOPPhasePending + vmop.Status.Progress = ptrToInt32(1) conditions.SetCondition( conditions.NewConditionBuilder(vmopcondition.TypeCompleted). Generation(vmop.GetGeneration()). @@ -406,17 +428,18 @@ func (h LifecycleHandler) execute(ctx context.Context, vmop *v1alpha2.VirtualMac return err } - vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress - reason := mapMigrationPhaseToReason[mig.Status.Phase] - if reason == vmopcondition.ReasonMigrationPending { - vmop.Status.Phase = v1alpha2.VMOPPhasePending - } - - msg, err := h.getConditionCompletedMessageByReason(ctx, reason, mig) + reason, msg, err := h.getInProgressReasonAndMessage(ctx, mig) if err != nil { return err } + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + if reason == vmopcondition.ReasonTargetScheduling { + vmop.Status.Phase = v1alpha2.VMOPPhasePending + } + progress := calculateMigrationProgress(vmop, mig, reason) + vmop.Status.Progress = ptrToInt32(progress) + conditions.SetCondition( conditions.NewConditionBuilder(vmopcondition.TypeCompleted). Generation(vmop.GetGeneration()). @@ -455,17 +478,6 @@ func (h LifecycleHandler) recordEvent(ctx context.Context, vmop *v1alpha2.Virtua } } -var mapMigrationPhaseToReason = map[virtv1.VirtualMachineInstanceMigrationPhase]vmopcondition.ReasonCompleted{ - virtv1.MigrationPhaseUnset: vmopcondition.ReasonMigrationPending, - virtv1.MigrationPending: vmopcondition.ReasonMigrationPending, - virtv1.MigrationScheduling: vmopcondition.ReasonMigrationPrepareTarget, - virtv1.MigrationScheduled: vmopcondition.ReasonMigrationPrepareTarget, - virtv1.MigrationTargetReady: vmopcondition.ReasonMigrationTargetReady, - virtv1.MigrationRunning: vmopcondition.ReasonMigrationRunning, - virtv1.MigrationSucceeded: vmopcondition.ReasonOperationCompleted, - virtv1.MigrationFailed: vmopcondition.ReasonOperationFailed, -} - func getMessageByMigrationFailedReason(mig *virtv1.VirtualMachineInstanceMigration) string { cond, found := conditions.GetKVVMIMCondition(virtv1.VirtualMachineInstanceMigrationFailed, mig.Status.Conditions) @@ -481,6 +493,153 @@ func getMessageByMigrationFailedReason(mig *virtv1.VirtualMachineInstanceMigrati return "" } +func ptrToInt32(v int32) *int32 { + return &v +} + +func (h LifecycleHandler) getFailedReason(mig *virtv1.VirtualMachineInstanceMigration) vmopcondition.ReasonCompleted { + if mig != nil && mig.Status.MigrationState != nil { + state := mig.Status.MigrationState + if state.AbortRequested || state.AbortStatus == virtv1.MigrationAbortSucceeded { + return vmopcondition.ReasonAborted + } + if strings.Contains(strings.ToLower(state.FailureReason), "converg") || strings.Contains(strings.ToLower(state.FailureReason), "progress") { + return vmopcondition.ReasonNotConverging + } + } + + if cond, found := conditions.GetKVVMIMCondition(virtv1.VirtualMachineInstanceMigrationFailed, mig.Status.Conditions); found { + reason := strings.ToLower(cond.Reason + " " + cond.Message) + if strings.Contains(reason, "schedul") || strings.Contains(reason, "unschedul") { + return vmopcondition.ReasonTargetUnschedulable + } + if strings.Contains(reason, "csi") || strings.Contains(reason, "attach") || strings.Contains(reason, "volume") || strings.Contains(reason, "disk") { + return vmopcondition.ReasonTargetDiskError + } + } + + return vmopcondition.ReasonFailed +} + +func (h LifecycleHandler) getFailedMessage(reason vmopcondition.ReasonCompleted, mig *virtv1.VirtualMachineInstanceMigration) string { + base := "Migration failed" + switch reason { + case vmopcondition.ReasonAborted: + base = "Migration aborted" + case vmopcondition.ReasonNotConverging: + base = "Migration did not converge" + case vmopcondition.ReasonTargetUnschedulable: + base = "Migration failed: target pod is unschedulable" + case vmopcondition.ReasonTargetDiskError: + base = "Migration failed: target disk attach error" + } + + if mig != nil && mig.Status.MigrationState != nil && mig.Status.MigrationState.FailureReason != "" { + return fmt.Sprintf("%s: %s", base, mig.Status.MigrationState.FailureReason) + } + if msg := getMessageByMigrationFailedReason(mig); msg != "" { + return fmt.Sprintf("%s: %s", base, msg) + } + return base +} + +func (h LifecycleHandler) getInProgressReasonAndMessage( + ctx context.Context, + mig *virtv1.VirtualMachineInstanceMigration, +) (vmopcondition.ReasonCompleted, string, error) { + reason := vmopcondition.ReasonSyncing + message := messageSyncingSourceAndTarget + + switch mig.Status.Phase { + case virtv1.MigrationPhaseUnset, virtv1.MigrationPending, virtv1.MigrationScheduling: + reason = vmopcondition.ReasonTargetScheduling + message = messageTargetPodScheduling + case virtv1.MigrationScheduled, virtv1.MigrationPreparingTarget: + reason = vmopcondition.ReasonTargetPreparing + message = messageTargetPodPreparing + case virtv1.MigrationTargetReady, virtv1.MigrationWaitingForSync, virtv1.MigrationSynchronizing, virtv1.MigrationRunning: + reason = vmopcondition.ReasonSyncing + message = messageSyncingSourceAndTarget + } + + pod, err := h.getTargetPod(ctx, mig) + if err != nil { + return "", "", err + } + if isPodPendingUnschedulable(pod) { + return vmopcondition.ReasonTargetUnschedulable, fmt.Sprintf("Target pod %q is unschedulable", pod.Namespace+"/"+pod.Name), nil + } + + if mig.Status.MigrationState != nil { + state := mig.Status.MigrationState + if state.TargetNodeDomainReadyTimestamp != nil { + reason = vmopcondition.ReasonTargetResumed + message = messageTargetVMResumed + } + if state.Completed { + reason = vmopcondition.ReasonSourceSuspended + message = messageSourceVMSuspended + } + } + + return reason, message, nil +} + +func calculateMigrationProgress( + vmop *v1alpha2.VirtualMachineOperation, + mig *virtv1.VirtualMachineInstanceMigration, + reason vmopcondition.ReasonCompleted, +) int32 { + switch reason { + case vmopcondition.ReasonDisksPreparing: + return progressDisksPreparing + case vmopcondition.ReasonTargetScheduling: + return progressTargetScheduling + case vmopcondition.ReasonTargetUnschedulable: + return progressTargetScheduling + case vmopcondition.ReasonTargetPreparing: + return progressTargetPreparing + case vmopcondition.ReasonTargetDiskError: + return progressTargetPreparing + case vmopcondition.ReasonSyncing: + start := vmop.CreationTimestamp.Time + if mig != nil && mig.Status.MigrationState != nil && mig.Status.MigrationState.StartTimestamp != nil { + start = mig.Status.MigrationState.StartTimestamp.Time + } + elapsed := time.Since(start) + if elapsed <= 0 { + return progressSyncingMin + } + seconds := elapsed.Seconds() + progress := progressSyncingMin + int32(minInt(int(seconds/syncingSecondsPerPercent), int(progressSyncingMax-progressSyncingMin))) + if progress < progressSyncingMin { + return progressSyncingMin + } + if progress > progressSyncingMax { + return progressSyncingMax + } + return progress + case vmopcondition.ReasonSourceSuspended: + return progressSourceSuspended + case vmopcondition.ReasonTargetResumed: + return progressTargetResumed + case vmopcondition.ReasonMigrationCompleted: + return progressMigrationCompleted + default: + if vmop != nil && vmop.Status.Progress != nil { + return *vmop.Status.Progress + } + return 0 + } +} + +func minInt(a, b int) int { + if a < b { + return a + } + return b +} + func (h LifecycleHandler) getTargetPod(ctx context.Context, mig *virtv1.VirtualMachineInstanceMigration) (*corev1.Pod, error) { selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{ @@ -522,30 +681,3 @@ func isPodPendingUnschedulable(pod *corev1.Pod) bool { } return false } - -func (h LifecycleHandler) getConditionCompletedMessageByReason( - ctx context.Context, - reason vmopcondition.ReasonCompleted, - mig *virtv1.VirtualMachineInstanceMigration, -) (string, error) { - switch reason { - case vmopcondition.ReasonMigrationPending: - return "The VirtualMachineOperation for migrating the virtual machine has been queued. " + - "Waiting for the queue to be processed and for this operation to be executed.", nil - - case vmopcondition.ReasonMigrationPrepareTarget: - pod, err := h.getTargetPod(ctx, mig) - if err != nil { - return "", err - } - - if isPodPendingUnschedulable(pod) { - return fmt.Sprintf("Waiting for the virtual machine to be scheduled: "+ - "target pod \"%s/%s\" is unschedulable.", pod.Namespace, pod.Name), nil - } - return "The target environment is in the process of being prepared for migration.", nil - - default: - return "Wait until operation is completed.", nil - } -} diff --git a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go index ecb1d71092..7876ea6a51 100644 --- a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go +++ b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go @@ -51,8 +51,8 @@ func newDataMetric(vmop *v1alpha2.VirtualMachineOperation) *dataMetric { var finishedAt int64 if vmop.Status.Phase == v1alpha2.VMOPPhaseCompleted || vmop.Status.Phase == v1alpha2.VMOPPhaseFailed { completedCond, _ := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions) - if (completedCond.Status == metav1.ConditionTrue && completedCond.Reason == string(vmopcondition.ReasonOperationCompleted)) || - (completedCond.Status == metav1.ConditionFalse && completedCond.Reason == string(vmopcondition.ReasonOperationFailed)) { + if (completedCond.Status == metav1.ConditionTrue && (completedCond.Reason == string(vmopcondition.ReasonOperationCompleted) || completedCond.Reason == string(vmopcondition.ReasonMigrationCompleted))) || + (completedCond.Status == metav1.ConditionFalse && (completedCond.Reason == string(vmopcondition.ReasonOperationFailed) || completedCond.Reason == string(vmopcondition.ReasonFailed) || completedCond.Reason == string(vmopcondition.ReasonAborted) || completedCond.Reason == string(vmopcondition.ReasonNotConverging))) { finishedAt = completedCond.LastTransitionTime.Unix() } } From 01469436d5496cb90d1c8bcca0d5c2ba90c34ccf Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 14:58:52 +0200 Subject: [PATCH 02/27] feat(vmop): adapt migration progress strategy to kubevirt fields Signed-off-by: Daniil Antoshin --- .../internal/handler/deletion_test.go | 4 +- .../migration/internal/handler/lifecycle.go | 55 ++---- .../internal/handler/lifecycle_test.go | 54 ++++++ .../migration/internal/handler/suite_test.go | 4 +- .../migration/internal/progress/mapper.go | 99 ++++++++++ .../migration/internal/progress/progress.go | 176 ++++++++++++++++++ .../internal/progress/progress_test.go | 84 +++++++++ .../monitoring/metrics/vmop/data_metric.go | 2 +- 8 files changed, 435 insertions(+), 43 deletions(-) create mode 100644 images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go create mode 100644 images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go create mode 100644 images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/deletion_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/deletion_test.go index 91ecaa08ea..c1ebe4e2be 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/deletion_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/deletion_test.go @@ -108,11 +108,11 @@ var _ = Describe("DeletionHandler", func() { }, Entry("VMOP Evict 1", newVmop(v1alpha2.VMOPPhaseInProgress, vmopbuilder.WithType(v1alpha2.VMOPTypeEvict), vmopbuilder.WithVirtualMachine("test-vm")), - newSimpleMigration("vmop-"+name, namespace, "test-vm"), true, + newSimpleMigration("vmop-"+name, "test-vm"), true, ), Entry("VMOP Evict 2", newVmop(v1alpha2.VMOPPhaseCompleted, vmopbuilder.WithType(v1alpha2.VMOPTypeEvict), vmopbuilder.WithVirtualMachine("test-vm")), - newSimpleMigration("vmop-"+name, namespace, "test-vm"), false, + newSimpleMigration("vmop-"+name, "test-vm"), false, ), ) }) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index fea3a2aaa1..15050814d1 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -32,6 +32,7 @@ import ( "github.com/deckhouse/virtualization-controller/pkg/common/object" commonvmop "github.com/deckhouse/virtualization-controller/pkg/common/vmop" "github.com/deckhouse/virtualization-controller/pkg/controller/conditions" + migrationprogress "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/migration/internal/progress" migrationservice "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/migration/internal/service" genericservice "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/service" "github.com/deckhouse/virtualization-controller/pkg/eventrecord" @@ -53,8 +54,6 @@ const ( progressSourceSuspended int32 = 91 progressTargetResumed int32 = 92 progressMigrationCompleted int32 = 100 - - syncingSecondsPerPercent = 2 ) const ( @@ -72,18 +71,20 @@ type Base interface { IsApplicableOrSetFailedPhase(checker genericservice.ApplicableChecker, vmop *v1alpha2.VirtualMachineOperation, vm *v1alpha2.VirtualMachine) bool } type LifecycleHandler struct { - client client.Client - migration *migrationservice.MigrationService - base Base - recorder eventrecord.EventRecorderLogger + client client.Client + migration *migrationservice.MigrationService + base Base + recorder eventrecord.EventRecorderLogger + progressStrategy migrationprogress.Strategy } func NewLifecycleHandler(client client.Client, migration *migrationservice.MigrationService, base Base, recorder eventrecord.EventRecorderLogger) *LifecycleHandler { return &LifecycleHandler{ - client: client, - migration: migration, - base: base, - recorder: recorder, + client: client, + migration: migration, + base: base, + recorder: recorder, + progressStrategy: migrationprogress.NewProgress(), } } @@ -289,7 +290,7 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp reason := h.getFailedReason(mig) msg := h.getFailedMessage(reason, mig) - progress := calculateMigrationProgress(vmop, mig, reason) + progress := h.calculateMigrationProgress(vmop, mig, reason) vmop.Status.Progress = ptrToInt32(progress) completedCond. @@ -320,7 +321,7 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp if reason == vmopcondition.ReasonTargetScheduling { vmop.Status.Phase = v1alpha2.VMOPPhasePending } - progress := calculateMigrationProgress(vmop, mig, reason) + progress := h.calculateMigrationProgress(vmop, mig, reason) vmop.Status.Progress = ptrToInt32(progress) completedCond. @@ -437,7 +438,7 @@ func (h LifecycleHandler) execute(ctx context.Context, vmop *v1alpha2.VirtualMac if reason == vmopcondition.ReasonTargetScheduling { vmop.Status.Phase = v1alpha2.VMOPPhasePending } - progress := calculateMigrationProgress(vmop, mig, reason) + progress := h.calculateMigrationProgress(vmop, mig, reason) vmop.Status.Progress = ptrToInt32(progress) conditions.SetCondition( @@ -585,7 +586,7 @@ func (h LifecycleHandler) getInProgressReasonAndMessage( return reason, message, nil } -func calculateMigrationProgress( +func (h LifecycleHandler) calculateMigrationProgress( vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, reason vmopcondition.ReasonCompleted, @@ -602,23 +603,8 @@ func calculateMigrationProgress( case vmopcondition.ReasonTargetDiskError: return progressTargetPreparing case vmopcondition.ReasonSyncing: - start := vmop.CreationTimestamp.Time - if mig != nil && mig.Status.MigrationState != nil && mig.Status.MigrationState.StartTimestamp != nil { - start = mig.Status.MigrationState.StartTimestamp.Time - } - elapsed := time.Since(start) - if elapsed <= 0 { - return progressSyncingMin - } - seconds := elapsed.Seconds() - progress := progressSyncingMin + int32(minInt(int(seconds/syncingSecondsPerPercent), int(progressSyncingMax-progressSyncingMin))) - if progress < progressSyncingMin { - return progressSyncingMin - } - if progress > progressSyncingMax { - return progressSyncingMax - } - return progress + record := migrationprogress.BuildRecord(vmop, mig, time.Now()) + return h.progressStrategy.SyncProgress(record) case vmopcondition.ReasonSourceSuspended: return progressSourceSuspended case vmopcondition.ReasonTargetResumed: @@ -633,13 +619,6 @@ func calculateMigrationProgress( } } -func minInt(a, b int) int { - if a < b { - return a - } - return b -} - func (h LifecycleHandler) getTargetPod(ctx context.Context, mig *virtv1.VirtualMachineInstanceMigration) (*corev1.Pod, error) { selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{ diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 4b0495882e..b98a08e5e7 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -19,6 +19,7 @@ package handler import ( "context" "fmt" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -31,6 +32,7 @@ import ( vmbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vm" vmopbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vmop" "github.com/deckhouse/virtualization-controller/pkg/common/testutil" + "github.com/deckhouse/virtualization-controller/pkg/controller/conditions" "github.com/deckhouse/virtualization-controller/pkg/controller/reconciler" "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/migration/internal/service" genericservice "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/service" @@ -38,6 +40,7 @@ import ( "github.com/deckhouse/virtualization-controller/pkg/featuregates" "github.com/deckhouse/virtualization/api/core/v1alpha2" "github.com/deckhouse/virtualization/api/core/v1alpha2/vmcondition" + "github.com/deckhouse/virtualization/api/core/v1alpha2/vmopcondition" ) var _ = Describe("LifecycleHandler", func() { @@ -246,4 +249,55 @@ var _ = Describe("LifecycleHandler", func() { false, // targetMigrationEnabled ), ) + + Describe("migration progress integration", func() { + It("should set syncing progress inside [10,90] for running migration", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Progress = ptr.To[int32](10) + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationRunning + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ + StartTimestamp: &metav1.Time{Time: time.Now().Add(-2 * time.Minute)}, + } + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseInProgress)) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(BeNumerically(">=", int32(10))) + Expect(*srv.Changed().Status.Progress).To(BeNumerically("<=", int32(90))) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonSyncing.String())) + }) + + It("should set progress to 100 for succeeded migration", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationSucceeded + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseCompleted)) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(Equal(int32(100))) + }) + }) }) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/suite_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/suite_test.go index 12cbc6a48c..a49f67229e 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/suite_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/suite_test.go @@ -62,7 +62,7 @@ func setupEnvironment(vmop *v1alpha2.VirtualMachineOperation, objs ...client.Obj return fakeClient, srv } -func newSimpleMigration(name, namespace, vm string) *virtv1.VirtualMachineInstanceMigration { +func newSimpleMigration(name, vm string) *virtv1.VirtualMachineInstanceMigration { return &virtv1.VirtualMachineInstanceMigration{ TypeMeta: metav1.TypeMeta{ APIVersion: virtv1.SchemeGroupVersion.String(), @@ -70,7 +70,7 @@ func newSimpleMigration(name, namespace, vm string) *virtv1.VirtualMachineInstan }, ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: namespace, + Namespace: "default", }, Spec: virtv1.VirtualMachineInstanceMigrationSpec{ VMIName: vm, diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go new file mode 100644 index 0000000000..4b81c47f63 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -0,0 +1,99 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package progress + +import ( + "time" + + virtv1 "kubevirt.io/api/core/v1" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +const unknownMetric = -1.0 + +// BuildRecord maps KubeVirt migration status to progress algorithm inputs. +// +// KubeVirt v1.6 does not expose transferred/remaining byte counters in +// VirtualMachineInstanceMigrationStatus, therefore data metrics are mapped to +// unknown values and Progress runs in deterministic degraded mode +// (time+phase based with stall bump). +func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, now time.Time) Record { + record := Record{ + Now: now, + StartedAt: now, + PreviousProgress: previousProgress(vmop), + DataTotalMiB: unknownMetric, + DataProcessedMiB: unknownMetric, + DataRemainingMiB: unknownMetric, + } + + if vmop != nil { + record.StartedAt = vmop.CreationTimestamp.Time + } + + if mig == nil { + return record + } + + record.Phase = mig.Status.Phase + if state := mig.Status.MigrationState; state != nil { + if state.StartTimestamp != nil { + record.StartedAt = state.StartTimestamp.Time + } + record.Mode = state.Mode + record.Iteration = mapIteration(state) + record.Throttle = mapThrottle(state) + } + + return record +} + +func previousProgress(vmop *v1alpha2.VirtualMachineOperation) int32 { + if vmop == nil || vmop.Status.Progress == nil { + return syncRangeMin + } + return *vmop.Status.Progress +} + +// mapIteration approximates iterative phase: post-copy and paused modes are +// treated as iterative (>0), otherwise pre-copy stays at iteration 0. +func mapIteration(state *virtv1.VirtualMachineInstanceMigrationState) int32 { + if state == nil { + return 0 + } + if state.Mode == virtv1.MigrationPostCopy || state.Mode == virtv1.MigrationPaused { + return 1 + } + return 0 +} + +// mapThrottle provides deterministic throttle approximation from available +// flags: auto-converge implies elevated throttle, post-copy/paused implies max. +func mapThrottle(state *virtv1.VirtualMachineInstanceMigrationState) float64 { + if state == nil { + return 0 + } + throttle := 0.0 + if state.MigrationConfiguration != nil && state.MigrationConfiguration.AllowAutoConverge != nil && *state.MigrationConfiguration.AllowAutoConverge { + throttle = 0.7 + } + if state.Mode == virtv1.MigrationPostCopy || state.Mode == virtv1.MigrationPaused { + throttle = 1.0 + } + return throttle +} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go new file mode 100644 index 0000000000..9f91e779e9 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -0,0 +1,176 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package progress + +import ( + "math" + "time" + + virtv1 "kubevirt.io/api/core/v1" +) + +const ( + syncRangeMin int32 = 10 + syncRangeMax int32 = 90 + + progressStartPercent = 3.0 + progressBulkCeiling = 45.0 + progressIterativeCeiling = 98.0 + progressBulkWeightMetric = 0.80 + progressBulkWeightTime = 0.20 + progressIterWeightMetric = 0.76 + progressIterWeightTime = 0.24 + progressBulkTimeRate = 0.45 + progressIterBaseTimeRate = 0.22 + progressIterThrottleRate = 0.18 + progressBulkStallSeconds = 45 + progressIterStallSeconds = 30 + progressBulkDurationGuess = 90.0 +) + +type Strategy interface { + SyncProgress(record Record) int32 +} + +type Record struct { + Now time.Time + StartedAt time.Time + PreviousProgress int32 + Phase virtv1.VirtualMachineInstanceMigrationPhase + Mode virtv1.MigrationMode + Iteration int32 + Throttle float64 + DataTotalMiB float64 + DataProcessedMiB float64 + DataRemainingMiB float64 +} + +type Progress struct{} + +func NewProgress() *Progress { + return &Progress{} +} + +func (p *Progress) SyncProgress(record Record) int32 { + elapsed := max(record.Now.Sub(record.StartedAt), 0) + elapsedSec := elapsed.Seconds() + + metricPct, hasMetric := metricPercent(record) + var internal float64 + + if isIterative(record, elapsedSec) { + iterTime := progressBulkCeiling + math.Max(0, elapsedSec-progressBulkDurationGuess)*(progressIterBaseTimeRate+clampFloat(record.Throttle, 0, 1)*progressIterThrottleRate) + iterMetric := iterativeMetricPercent(record, metricPct, hasMetric) + if hasMetric { + internal = progressIterWeightMetric*iterMetric + progressIterWeightTime*iterTime + } else { + internal = iterTime + } + internal = clampFloat(internal, progressBulkCeiling, progressIterativeCeiling) + } else { + bulkTime := progressStartPercent + elapsedSec*progressBulkTimeRate + if hasMetric { + internal = progressBulkWeightMetric*metricPct + progressBulkWeightTime*bulkTime + } else { + internal = bulkTime + } + internal = clampFloat(internal, progressStartPercent, progressBulkCeiling) + } + + syncProgress := mapToSyncRange(internal) + return applyMonotonicStallBump(record.PreviousProgress, syncProgress, elapsedSec, isIterative(record, elapsedSec)) +} + +func metricPercent(record Record) (float64, bool) { + if record.DataTotalMiB > 0 && record.DataProcessedMiB >= 0 { + return clampFloat((record.DataProcessedMiB/record.DataTotalMiB)*100.0, 0, 100), true + } + if record.DataTotalMiB > 0 && record.DataRemainingMiB >= 0 { + processed := record.DataTotalMiB - record.DataRemainingMiB + return clampFloat((processed/record.DataTotalMiB)*100.0, 0, 100), true + } + return 0, false +} + +func iterativeMetricPercent(record Record, metricPct float64, hasMetric bool) float64 { + if hasMetric { + if record.DataTotalMiB > 0 && record.DataRemainingMiB >= 0 { + remainingRatio := clampFloat(record.DataRemainingMiB/record.DataTotalMiB, 0.0001, 1) + shaped := 1 - math.Log1p(remainingRatio*9)/math.Log(10) + return clampFloat(progressBulkCeiling+shaped*(progressIterativeCeiling-progressBulkCeiling), progressBulkCeiling, progressIterativeCeiling) + } + return clampFloat(progressBulkCeiling+(metricPct/100.0)*(progressIterativeCeiling-progressBulkCeiling), progressBulkCeiling, progressIterativeCeiling) + } + return progressBulkCeiling +} + +func isIterative(record Record, elapsedSec float64) bool { + if record.Iteration > 0 { + return true + } + if record.Mode == virtv1.MigrationPostCopy || record.Mode == virtv1.MigrationPaused { + return true + } + if record.Phase == virtv1.MigrationRunning || record.Phase == virtv1.MigrationSynchronizing { + return elapsedSec >= progressBulkDurationGuess + } + return false +} + +func applyMonotonicStallBump(previous, current int32, elapsedSec float64, iterative bool) int32 { + prev := clampSyncRange(previous) + if current < prev { + current = prev + } + if current == prev { + window := float64(progressBulkStallSeconds) + if iterative { + window = float64(progressIterStallSeconds) + } + if elapsedSec >= window { + current = clampSyncRange(prev + 1) + } + } + return clampSyncRange(current) +} + +func mapToSyncRange(internal float64) int32 { + normalized := (clampFloat(internal, progressStartPercent, progressIterativeCeiling) - progressStartPercent) / + (progressIterativeCeiling - progressStartPercent) + mapped := float64(syncRangeMin) + normalized*float64(syncRangeMax-syncRangeMin) + return clampSyncRange(int32(math.Round(mapped))) +} + +func clampFloat(v, minV, maxV float64) float64 { + if v < minV { + return minV + } + if v > maxV { + return maxV + } + return v +} + +func clampSyncRange(v int32) int32 { + if v < syncRangeMin { + return syncRangeMin + } + if v > syncRangeMax { + return syncRangeMax + } + return v +} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go new file mode 100644 index 0000000000..20b1d237a0 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package progress + +import ( + "testing" + "time" + + virtv1 "kubevirt.io/api/core/v1" +) + +func TestProgress_MonotonicGrowth(t *testing.T) { + now := time.Now() + p := NewProgress() + + first := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-20 * time.Second), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + }) + second := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-80 * time.Second), + PreviousProgress: first, + Phase: virtv1.MigrationRunning, + }) + + if second < first { + t.Fatalf("expected monotonic progress, first=%d second=%d", first, second) + } +} + +func TestProgress_SyncRangeCaps(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-2 * time.Hour), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + Mode: virtv1.MigrationPostCopy, + Iteration: 1, + Throttle: 1, + DataTotalMiB: 1024, + DataProcessedMiB: 2048, + DataRemainingMiB: 0, + }) + + if progress < syncRangeMin || progress > syncRangeMax { + t.Fatalf("expected progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, progress) + } +} + +func TestProgress_StallBump(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-50 * time.Second), + PreviousProgress: 70, + Phase: virtv1.MigrationRunning, + }) + + if progress != 71 { + t.Fatalf("expected stall bump to increase progress to 71, got=%d", progress) + } +} diff --git a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go index 7876ea6a51..f48de91500 100644 --- a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go +++ b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go @@ -52,7 +52,7 @@ func newDataMetric(vmop *v1alpha2.VirtualMachineOperation) *dataMetric { if vmop.Status.Phase == v1alpha2.VMOPPhaseCompleted || vmop.Status.Phase == v1alpha2.VMOPPhaseFailed { completedCond, _ := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions) if (completedCond.Status == metav1.ConditionTrue && (completedCond.Reason == string(vmopcondition.ReasonOperationCompleted) || completedCond.Reason == string(vmopcondition.ReasonMigrationCompleted))) || - (completedCond.Status == metav1.ConditionFalse && (completedCond.Reason == string(vmopcondition.ReasonOperationFailed) || completedCond.Reason == string(vmopcondition.ReasonFailed) || completedCond.Reason == string(vmopcondition.ReasonAborted) || completedCond.Reason == string(vmopcondition.ReasonNotConverging))) { + (completedCond.Status == metav1.ConditionFalse && (completedCond.Reason == string(vmopcondition.ReasonOperationFailed) || completedCond.Reason == string(vmopcondition.ReasonFailed) || completedCond.Reason == string(vmopcondition.ReasonAborted) || completedCond.Reason == string(vmopcondition.ReasonNotConverging) || completedCond.Reason == string(vmopcondition.ReasonTargetUnschedulable) || completedCond.Reason == string(vmopcondition.ReasonTargetDiskError))) { finishedAt = completedCond.LastTransitionTime.Unix() } } From ea31a606c3b21370e9f5cc7bb23c0f6539c3f94e Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 15:42:37 +0200 Subject: [PATCH 03/27] feat(core): estimate migration progress without transfer metrics Handle nil migrations as generic failures and keep terminal VMOP metrics detection explicit. Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 6 +- .../internal/handler/lifecycle_test.go | 6 ++ .../migration/internal/progress/progress.go | 4 + .../internal/progress/progress_test.go | 19 ++++ .../monitoring/metrics/vmop/data_metric.go | 29 +++++- .../metrics/vmop/data_metric_test.go | 93 +++++++++++++++++++ 6 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 15050814d1..f7d39118e7 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -499,7 +499,11 @@ func ptrToInt32(v int32) *int32 { } func (h LifecycleHandler) getFailedReason(mig *virtv1.VirtualMachineInstanceMigration) vmopcondition.ReasonCompleted { - if mig != nil && mig.Status.MigrationState != nil { + if mig == nil { + return vmopcondition.ReasonFailed + } + + if mig.Status.MigrationState != nil { state := mig.Status.MigrationState if state.AbortRequested || state.AbortStatus == virtv1.MigrationAbortSucceeded { return vmopcondition.ReasonAborted diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index b98a08e5e7..2abd4297b7 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -251,6 +251,12 @@ var _ = Describe("LifecycleHandler", func() { ) Describe("migration progress integration", func() { + It("should return generic failed reason for nil migration", func() { + h := LifecycleHandler{} + + Expect(h.getFailedReason(nil)).To(Equal(vmopcondition.ReasonFailed)) + }) + It("should set syncing progress inside [10,90] for running migration", func() { vm := newVM(v1alpha2.PreferSafeMigrationPolicy) vmop := newVMOPMigrate() diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 9f91e779e9..453bb61afd 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -27,6 +27,10 @@ const ( syncRangeMin int32 = 10 syncRangeMax int32 = 90 + // These coefficients tune the degraded-mode progress estimation when KubeVirt + // does not expose byte counters for migration transfer state. The algorithm + // keeps early stages below the sync range, maps active data synchronization + // into [10,90], and preserves monotonic growth with a small stall bump. progressStartPercent = 3.0 progressBulkCeiling = 45.0 progressIterativeCeiling = 98.0 diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 20b1d237a0..401d43c535 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -82,3 +82,22 @@ func TestProgress_StallBump(t *testing.T) { t.Fatalf("expected stall bump to increase progress to 71, got=%d", progress) } } + +func TestProgress_DegradedModeWithoutMetrics(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-2 * time.Minute), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + DataTotalMiB: unknownMetric, + DataProcessedMiB: unknownMetric, + DataRemainingMiB: unknownMetric, + }) + + if progress < syncRangeMin || progress > syncRangeMax { + t.Fatalf("expected degraded-mode progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, progress) + } +} diff --git a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go index f48de91500..526043eeb7 100644 --- a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go +++ b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric.go @@ -36,6 +36,20 @@ type dataMetric struct { FinishedAt int64 // Unix timestamp when operation finished (Completed/Failed) (0 = not set) } +var successfulTerminalReasons = map[string]struct{}{ + string(vmopcondition.ReasonOperationCompleted): {}, + string(vmopcondition.ReasonMigrationCompleted): {}, +} + +var failedTerminalReasons = map[string]struct{}{ + string(vmopcondition.ReasonOperationFailed): {}, + string(vmopcondition.ReasonFailed): {}, + string(vmopcondition.ReasonAborted): {}, + string(vmopcondition.ReasonNotConverging): {}, + string(vmopcondition.ReasonTargetUnschedulable): {}, + string(vmopcondition.ReasonTargetDiskError): {}, +} + // DO NOT mutate VirtualMachineOperation! func newDataMetric(vmop *v1alpha2.VirtualMachineOperation) *dataMetric { if vmop == nil { @@ -51,8 +65,7 @@ func newDataMetric(vmop *v1alpha2.VirtualMachineOperation) *dataMetric { var finishedAt int64 if vmop.Status.Phase == v1alpha2.VMOPPhaseCompleted || vmop.Status.Phase == v1alpha2.VMOPPhaseFailed { completedCond, _ := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions) - if (completedCond.Status == metav1.ConditionTrue && (completedCond.Reason == string(vmopcondition.ReasonOperationCompleted) || completedCond.Reason == string(vmopcondition.ReasonMigrationCompleted))) || - (completedCond.Status == metav1.ConditionFalse && (completedCond.Reason == string(vmopcondition.ReasonOperationFailed) || completedCond.Reason == string(vmopcondition.ReasonFailed) || completedCond.Reason == string(vmopcondition.ReasonAborted) || completedCond.Reason == string(vmopcondition.ReasonNotConverging) || completedCond.Reason == string(vmopcondition.ReasonTargetUnschedulable) || completedCond.Reason == string(vmopcondition.ReasonTargetDiskError))) { + if isTerminalCompletedCondition(completedCond) { finishedAt = completedCond.LastTransitionTime.Unix() } } @@ -69,3 +82,15 @@ func newDataMetric(vmop *v1alpha2.VirtualMachineOperation) *dataMetric { FinishedAt: finishedAt, } } + +func isTerminalCompletedCondition(cond metav1.Condition) bool { + if cond.Status == metav1.ConditionTrue { + _, ok := successfulTerminalReasons[cond.Reason] + return ok + } + if cond.Status == metav1.ConditionFalse { + _, ok := failedTerminalReasons[cond.Reason] + return ok + } + return false +} diff --git a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go new file mode 100644 index 0000000000..a58dccdc15 --- /dev/null +++ b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go @@ -0,0 +1,93 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmop + +import ( + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" + "github.com/deckhouse/virtualization/api/core/v1alpha2/vmopcondition" +) + +func TestIsTerminalCompletedCondition(t *testing.T) { + tests := []struct { + name string + cond metav1.Condition + want bool + }{ + { + name: "migration completed reason is terminal", + cond: metav1.Condition{Status: metav1.ConditionTrue, Reason: vmopcondition.ReasonMigrationCompleted.String()}, + want: true, + }, + { + name: "target disk error reason is terminal", + cond: metav1.Condition{Status: metav1.ConditionFalse, Reason: vmopcondition.ReasonTargetDiskError.String()}, + want: true, + }, + { + name: "in progress reason is not terminal", + cond: metav1.Condition{Status: metav1.ConditionFalse, Reason: vmopcondition.ReasonSyncing.String()}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isTerminalCompletedCondition(tt.cond); got != tt.want { + t.Fatalf("isTerminalCompletedCondition() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNewDataMetric_SetsFinishedAtForTerminalMigrationReasons(t *testing.T) { + finishedAt := metav1.NewTime(time.Unix(1710000000, 0)) + vmop := &v1alpha2.VirtualMachineOperation{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vmop-test", + Namespace: "default", + CreationTimestamp: metav1.NewTime(time.Unix(1700000000, 0)), + }, + Spec: v1alpha2.VirtualMachineOperationSpec{ + Type: v1alpha2.VMOPTypeMigrate, + VirtualMachine: "test-vm", + }, + Status: v1alpha2.VirtualMachineOperationStatus{ + Phase: v1alpha2.VMOPPhaseFailed, + Conditions: []metav1.Condition{ + { + Type: vmopcondition.TypeCompleted.String(), + Status: metav1.ConditionFalse, + Reason: vmopcondition.ReasonTargetUnschedulable.String(), + LastTransitionTime: finishedAt, + }, + }, + }, + } + + metric := newDataMetric(vmop) + if metric == nil { + t.Fatal("expected metric to be created") + } + if metric.FinishedAt != finishedAt.Unix() { + t.Fatalf("expected FinishedAt=%d, got %d", finishedAt.Unix(), metric.FinishedAt) + } +} From 9bfedf0d951ac8e20230e95bd8170f037511d178 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 16:11:37 +0200 Subject: [PATCH 04/27] test(core): add migration progress test coverage Signed-off-by: Daniil Antoshin --- .../internal/handler/lifecycle_test.go | 183 +++++++++++++++ .../internal/progress/mapper_test.go | 208 ++++++++++++++++++ .../internal/progress/progress_test.go | 161 ++++++++++++++ .../metrics/vmop/data_metric_test.go | 10 + 4 files changed, 562 insertions(+) create mode 100644 images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 2abd4297b7..fd845026e4 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -23,6 +23,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/component-base/featuregate" "k8s.io/utils/ptr" @@ -34,6 +35,7 @@ import ( "github.com/deckhouse/virtualization-controller/pkg/common/testutil" "github.com/deckhouse/virtualization-controller/pkg/controller/conditions" "github.com/deckhouse/virtualization-controller/pkg/controller/reconciler" + migrationprogress "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/migration/internal/progress" "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/migration/internal/service" genericservice "github.com/deckhouse/virtualization-controller/pkg/controller/vmop/service" "github.com/deckhouse/virtualization-controller/pkg/eventrecord" @@ -43,6 +45,14 @@ import ( "github.com/deckhouse/virtualization/api/core/v1alpha2/vmopcondition" ) +type progressStrategyStub struct { + value int32 +} + +func (s progressStrategyStub) SyncProgress(_ migrationprogress.Record) int32 { + return s.value +} + var _ = Describe("LifecycleHandler", func() { const ( name = "test" @@ -257,6 +267,133 @@ var _ = Describe("LifecycleHandler", func() { Expect(h.getFailedReason(nil)).To(Equal(vmopcondition.ReasonFailed)) }) + DescribeTable("should detect failed reason", func(mig *virtv1.VirtualMachineInstanceMigration, expected vmopcondition.ReasonCompleted) { + h := LifecycleHandler{} + Expect(h.getFailedReason(mig)).To(Equal(expected)) + }, + Entry("aborted by request", + &virtv1.VirtualMachineInstanceMigration{Status: virtv1.VirtualMachineInstanceMigrationStatus{MigrationState: &virtv1.VirtualMachineInstanceMigrationState{AbortRequested: true}}}, + vmopcondition.ReasonAborted, + ), + Entry("aborted with succeeded status", + &virtv1.VirtualMachineInstanceMigration{Status: virtv1.VirtualMachineInstanceMigrationStatus{MigrationState: &virtv1.VirtualMachineInstanceMigrationState{AbortStatus: virtv1.MigrationAbortSucceeded}}}, + vmopcondition.ReasonAborted, + ), + Entry("not converging from failure reason", + &virtv1.VirtualMachineInstanceMigration{Status: virtv1.VirtualMachineInstanceMigrationStatus{MigrationState: &virtv1.VirtualMachineInstanceMigrationState{FailureReason: "no progress during convergence"}}}, + vmopcondition.ReasonNotConverging, + ), + Entry("target unschedulable from condition", + &virtv1.VirtualMachineInstanceMigration{Status: virtv1.VirtualMachineInstanceMigrationStatus{Conditions: []virtv1.VirtualMachineInstanceMigrationCondition{{Type: virtv1.VirtualMachineInstanceMigrationFailed, Reason: "Unschedulable", Message: "pod is unschedulable"}}}}, + vmopcondition.ReasonTargetUnschedulable, + ), + Entry("target disk error from condition", + &virtv1.VirtualMachineInstanceMigration{Status: virtv1.VirtualMachineInstanceMigrationStatus{Conditions: []virtv1.VirtualMachineInstanceMigrationCondition{{Type: virtv1.VirtualMachineInstanceMigrationFailed, Reason: "VolumeAttach", Message: "csi volume attach failed"}}}}, + vmopcondition.ReasonTargetDiskError, + ), + Entry("generic failed reason", + &virtv1.VirtualMachineInstanceMigration{}, + vmopcondition.ReasonFailed, + ), + ) + + DescribeTable("should build in-progress reason and message", func( + phase virtv1.VirtualMachineInstanceMigrationPhase, + state *virtv1.VirtualMachineInstanceMigrationState, + pod *corev1.Pod, + expectedReason vmopcondition.ReasonCompleted, + ) { + mig := newSimpleMigration("vmop-test", name) + mig.UID = "migration-uid" + mig.Status.Phase = phase + mig.Status.MigrationState = state + + objects := []client.Object{mig} + if pod != nil { + objects = append(objects, pod) + } + fakeClient, err := testutil.NewFakeClientWithObjects(objects...) + Expect(err).NotTo(HaveOccurred()) + + h := LifecycleHandler{client: fakeClient} + reason, _, err := h.getInProgressReasonAndMessage(ctx, mig) + Expect(err).NotTo(HaveOccurred()) + Expect(reason).To(Equal(expectedReason)) + }, + Entry("phase unset means target scheduling", + virtv1.MigrationPhaseUnset, + nil, + nil, + vmopcondition.ReasonTargetScheduling, + ), + Entry("scheduled means target preparing", + virtv1.MigrationScheduled, + nil, + nil, + vmopcondition.ReasonTargetPreparing, + ), + Entry("running means syncing", + virtv1.MigrationRunning, + nil, + nil, + vmopcondition.ReasonSyncing, + ), + Entry("unschedulable pod has priority", + virtv1.MigrationScheduling, + nil, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: "target-pod", + Labels: map[string]string{ + virtv1.AppLabel: "virt-launcher", + virtv1.MigrationJobLabel: "migration-uid", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{{ + Type: corev1.PodScheduled, + Status: corev1.ConditionFalse, + Reason: corev1.PodReasonUnschedulable, + }}, + }, + }, + vmopcondition.ReasonTargetUnschedulable, + ), + Entry("target resumed after domain ready timestamp", + virtv1.MigrationRunning, + &virtv1.VirtualMachineInstanceMigrationState{TargetNodeDomainReadyTimestamp: &metav1.Time{Time: time.Now()}}, + nil, + vmopcondition.ReasonTargetResumed, + ), + Entry("source suspended after completed flag", + virtv1.MigrationRunning, + &virtv1.VirtualMachineInstanceMigrationState{Completed: true}, + nil, + vmopcondition.ReasonSourceSuspended, + ), + ) + + DescribeTable("should map progress by reason", func(reason vmopcondition.ReasonCompleted, initial *int32, expected int32) { + h := LifecycleHandler{progressStrategy: progressStrategyStub{value: 55}} + vmop := &v1alpha2.VirtualMachineOperation{Status: v1alpha2.VirtualMachineOperationStatus{Progress: initial}} + mig := &virtv1.VirtualMachineInstanceMigration{} + + Expect(h.calculateMigrationProgress(vmop, mig, reason)).To(Equal(expected)) + }, + Entry("disks preparing", vmopcondition.ReasonDisksPreparing, nil, int32(1)), + Entry("target scheduling", vmopcondition.ReasonTargetScheduling, nil, int32(2)), + Entry("target unschedulable", vmopcondition.ReasonTargetUnschedulable, nil, int32(2)), + Entry("target preparing", vmopcondition.ReasonTargetPreparing, nil, int32(3)), + Entry("target disk error", vmopcondition.ReasonTargetDiskError, nil, int32(3)), + Entry("syncing delegates to strategy", vmopcondition.ReasonSyncing, nil, int32(55)), + Entry("source suspended", vmopcondition.ReasonSourceSuspended, nil, int32(91)), + Entry("target resumed", vmopcondition.ReasonTargetResumed, nil, int32(92)), + Entry("migration completed", vmopcondition.ReasonMigrationCompleted, nil, int32(100)), + Entry("unknown keeps existing progress", vmopcondition.ReasonFailed, ptr.To[int32](44), int32(44)), + ) + It("should set syncing progress inside [10,90] for running migration", func() { vm := newVM(v1alpha2.PreferSafeMigrationPolicy) vmop := newVMOPMigrate() @@ -286,6 +423,52 @@ var _ = Describe("LifecycleHandler", func() { Expect(completed.Reason).To(Equal(vmopcondition.ReasonSyncing.String())) }) + It("should set pending phase and progress to 2 for scheduling migration", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationScheduling + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhasePending)) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(Equal(int32(2))) + }) + + It("should set aborted reason and preserve progress for failed migration", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Progress = ptr.To[int32](55) + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationFailed + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{AbortRequested: true} + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseFailed)) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(Equal(int32(55))) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonAborted.String())) + }) + It("should set progress to 100 for succeeded migration", func() { vm := newVM(v1alpha2.PreferSafeMigrationPolicy) vmop := newVMOPMigrate() diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go new file mode 100644 index 0000000000..fb80d7c98f --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -0,0 +1,208 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package progress + +import ( + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + virtv1 "kubevirt.io/api/core/v1" + + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +func TestBuildRecord_NilVMOPAndMigration(t *testing.T) { + now := time.Unix(1710000000, 0) + + record := BuildRecord(nil, nil, now) + + if !record.StartedAt.Equal(now) { + t.Fatalf("expected StartedAt=%v, got %v", now, record.StartedAt) + } + if record.PreviousProgress != syncRangeMin { + t.Fatalf("expected PreviousProgress=%d, got %d", syncRangeMin, record.PreviousProgress) + } + if record.DataTotalMiB != unknownMetric || record.DataProcessedMiB != unknownMetric || record.DataRemainingMiB != unknownMetric { + t.Fatalf("expected unknown metrics, got total=%v processed=%v remaining=%v", record.DataTotalMiB, record.DataProcessedMiB, record.DataRemainingMiB) + } +} + +func TestBuildRecord_UsesVMOPCreationTimestampAndPreviousProgress(t *testing.T) { + now := time.Unix(1710000000, 0) + vmop := &v1alpha2.VirtualMachineOperation{ + ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(now.Add(-3 * time.Minute))}, + Status: v1alpha2.VirtualMachineOperationStatus{Progress: ptr.To[int32](42)}, + } + + record := BuildRecord(vmop, nil, now) + + if !record.StartedAt.Equal(vmop.CreationTimestamp.Time) { + t.Fatalf("expected StartedAt=%v, got %v", vmop.CreationTimestamp.Time, record.StartedAt) + } + if record.PreviousProgress != 42 { + t.Fatalf("expected PreviousProgress=42, got %d", record.PreviousProgress) + } +} + +func TestBuildRecord_UsesMigrationState(t *testing.T) { + now := time.Unix(1710000000, 0) + start := metav1.NewTime(now.Add(-5 * time.Minute)) + autoConverge := true + mig := &virtv1.VirtualMachineInstanceMigration{ + Status: virtv1.VirtualMachineInstanceMigrationStatus{ + Phase: virtv1.MigrationRunning, + MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ + StartTimestamp: &start, + Mode: virtv1.MigrationPostCopy, + MigrationConfiguration: &virtv1.MigrationConfiguration{ + AllowAutoConverge: &autoConverge, + }, + }, + }, + } + + record := BuildRecord(nil, mig, now) + + if record.Phase != virtv1.MigrationRunning { + t.Fatalf("expected Phase=%s, got %s", virtv1.MigrationRunning, record.Phase) + } + if !record.StartedAt.Equal(start.Time) { + t.Fatalf("expected StartedAt=%v, got %v", start.Time, record.StartedAt) + } + if record.Mode != virtv1.MigrationPostCopy { + t.Fatalf("expected Mode=%s, got %s", virtv1.MigrationPostCopy, record.Mode) + } + if record.Iteration != 1 { + t.Fatalf("expected Iteration=1, got %d", record.Iteration) + } + if record.Throttle != 1.0 { + t.Fatalf("expected Throttle=1.0, got %v", record.Throttle) + } +} + +func TestPreviousProgress(t *testing.T) { + tests := []struct { + name string + vmop *v1alpha2.VirtualMachineOperation + want int32 + }{ + { + name: "nil vmop", + vmop: nil, + want: syncRangeMin, + }, + { + name: "nil progress", + vmop: &v1alpha2.VirtualMachineOperation{}, + want: syncRangeMin, + }, + { + name: "explicit progress", + vmop: &v1alpha2.VirtualMachineOperation{Status: v1alpha2.VirtualMachineOperationStatus{Progress: ptr.To[int32](37)}}, + want: 37, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := previousProgress(tt.vmop); got != tt.want { + t.Fatalf("previousProgress() = %d, want %d", got, tt.want) + } + }) + } +} + +func TestMapIteration(t *testing.T) { + tests := []struct { + name string + state *virtv1.VirtualMachineInstanceMigrationState + want int32 + }{ + { + name: "nil state", + state: nil, + want: 0, + }, + { + name: "pre-copy", + state: &virtv1.VirtualMachineInstanceMigrationState{Mode: virtv1.MigrationPreCopy}, + want: 0, + }, + { + name: "post-copy", + state: &virtv1.VirtualMachineInstanceMigrationState{Mode: virtv1.MigrationPostCopy}, + want: 1, + }, + { + name: "paused", + state: &virtv1.VirtualMachineInstanceMigrationState{Mode: virtv1.MigrationPaused}, + want: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := mapIteration(tt.state); got != tt.want { + t.Fatalf("mapIteration() = %d, want %d", got, tt.want) + } + }) + } +} + +func TestMapThrottle(t *testing.T) { + tests := []struct { + name string + state *virtv1.VirtualMachineInstanceMigrationState + want float64 + }{ + { + name: "nil state", + state: nil, + want: 0, + }, + { + name: "default throttle", + state: &virtv1.VirtualMachineInstanceMigrationState{}, + want: 0, + }, + { + name: "auto converge", + state: &virtv1.VirtualMachineInstanceMigrationState{ + MigrationConfiguration: &virtv1.MigrationConfiguration{AllowAutoConverge: ptr.To(true)}, + }, + want: 0.7, + }, + { + name: "post-copy overrides throttle", + state: &virtv1.VirtualMachineInstanceMigrationState{ + Mode: virtv1.MigrationPostCopy, + MigrationConfiguration: &virtv1.MigrationConfiguration{AllowAutoConverge: ptr.To(true)}, + }, + want: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := mapThrottle(tt.state); got != tt.want { + t.Fatalf("mapThrottle() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 401d43c535..9f07f56b4f 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -101,3 +101,164 @@ func TestProgress_DegradedModeWithoutMetrics(t *testing.T) { t.Fatalf("expected degraded-mode progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, progress) } } + +func TestProgress_WithMetricsInBulkPhase(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-30 * time.Second), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 512, + }) + + if progress <= syncRangeMin || progress >= syncRangeMax { + t.Fatalf("expected bulk progress strictly inside sync range, got=%d", progress) + } +} + +func TestProgress_WithMetricsInIterativePhase(t *testing.T) { + now := time.Now() + p := NewProgress() + + bulk := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-30 * time.Second), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 512, + }) + iterative := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-3 * time.Minute), + PreviousProgress: bulk, + Phase: virtv1.MigrationRunning, + Mode: virtv1.MigrationPostCopy, + Iteration: 1, + Throttle: 1, + DataTotalMiB: 1024, + DataRemainingMiB: 64, + }) + + if iterative <= bulk { + t.Fatalf("expected iterative progress to be greater than bulk progress, bulk=%d iterative=%d", bulk, iterative) + } + if iterative < syncRangeMin || iterative > syncRangeMax { + t.Fatalf("expected iterative progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, iterative) + } +} + +func TestProgress_UsesRemainingDataFallback(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-90 * time.Second), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 100, + DataProcessedMiB: unknownMetric, + DataRemainingMiB: 25, + }) + + if progress <= syncRangeMin { + t.Fatalf("expected fallback metric progress above syncRangeMin, got=%d", progress) + } +} + +func TestProgress_ZeroElapsed(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now, + PreviousProgress: syncRangeMin, + Phase: virtv1.MigrationPending, + }) + + if progress != syncRangeMin { + t.Fatalf("expected zero elapsed progress=%d, got=%d", syncRangeMin, progress) + } +} + +func TestProgress_VeryLargeElapsedIsCapped(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + Now: now, + StartedAt: now.Add(-24 * time.Hour), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + Mode: virtv1.MigrationPostCopy, + Iteration: 1, + }) + + if progress != syncRangeMax { + t.Fatalf("expected capped progress=%d, got=%d", syncRangeMax, progress) + } +} + +func TestIsIterative(t *testing.T) { + tests := []struct { + name string + record Record + elapsed float64 + expected bool + }{ + { + name: "iteration implies iterative", + record: Record{Iteration: 1}, + elapsed: 1, + expected: true, + }, + { + name: "post copy mode implies iterative", + record: Record{Mode: virtv1.MigrationPostCopy}, + elapsed: 1, + expected: true, + }, + { + name: "long running implies iterative", + record: Record{Phase: virtv1.MigrationRunning}, + elapsed: progressBulkDurationGuess, + expected: true, + }, + { + name: "short pre-copy is not iterative", + record: Record{Phase: virtv1.MigrationRunning}, + elapsed: progressBulkDurationGuess - 1, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isIterative(tt.record, tt.elapsed); got != tt.expected { + t.Fatalf("isIterative() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestProgress_StallBumpNotAppliedEarly(t *testing.T) { + got := applyMonotonicStallBump(70, 70, float64(progressBulkStallSeconds-1), false) + if got != 70 { + t.Fatalf("expected no stall bump before window, got=%d", got) + } +} + +func TestMapToSyncRangeBoundaries(t *testing.T) { + if got := mapToSyncRange(progressStartPercent); got != syncRangeMin { + t.Fatalf("expected lower boundary=%d, got=%d", syncRangeMin, got) + } + if got := mapToSyncRange(progressIterativeCeiling); got != syncRangeMax { + t.Fatalf("expected upper boundary=%d, got=%d", syncRangeMax, got) + } +} diff --git a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go index a58dccdc15..fd44252a5b 100644 --- a/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go +++ b/images/virtualization-artifact/pkg/monitoring/metrics/vmop/data_metric_test.go @@ -42,6 +42,16 @@ func TestIsTerminalCompletedCondition(t *testing.T) { cond: metav1.Condition{Status: metav1.ConditionFalse, Reason: vmopcondition.ReasonTargetDiskError.String()}, want: true, }, + { + name: "aborted reason is terminal", + cond: metav1.Condition{Status: metav1.ConditionFalse, Reason: vmopcondition.ReasonAborted.String()}, + want: true, + }, + { + name: "not converging reason is terminal", + cond: metav1.Condition{Status: metav1.ConditionFalse, Reason: vmopcondition.ReasonNotConverging.String()}, + want: true, + }, { name: "in progress reason is not terminal", cond: metav1.Condition{Status: metav1.ConditionFalse, Reason: vmopcondition.ReasonSyncing.String()}, From 84dd100c3c61b55b6723cd695f09b4989089da88 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 16:41:04 +0200 Subject: [PATCH 05/27] refactor(core): centralize migration sync progress range Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 2 -- .../internal/handler/lifecycle_test.go | 4 +-- .../migration/internal/progress/mapper.go | 2 +- .../internal/progress/mapper_test.go | 8 ++--- .../migration/internal/progress/progress.go | 14 ++++---- .../internal/progress/progress_test.go | 36 +++++++++---------- 6 files changed, 32 insertions(+), 34 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index f7d39118e7..a31fad2245 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -49,8 +49,6 @@ const ( progressDisksPreparing int32 = 1 progressTargetScheduling int32 = 2 progressTargetPreparing int32 = 3 - progressSyncingMin int32 = 10 - progressSyncingMax int32 = 90 progressSourceSuspended int32 = 91 progressTargetResumed int32 = 92 progressMigrationCompleted int32 = 100 diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index fd845026e4..3b17ef3c79 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -415,8 +415,8 @@ var _ = Describe("LifecycleHandler", func() { Expect(err).NotTo(HaveOccurred()) Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseInProgress)) Expect(srv.Changed().Status.Progress).NotTo(BeNil()) - Expect(*srv.Changed().Status.Progress).To(BeNumerically(">=", int32(10))) - Expect(*srv.Changed().Status.Progress).To(BeNumerically("<=", int32(90))) + Expect(*srv.Changed().Status.Progress).To(BeNumerically(">=", migrationprogress.SyncRangeMin)) + Expect(*srv.Changed().Status.Progress).To(BeNumerically("<=", migrationprogress.SyncRangeMax)) completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) Expect(found).To(BeTrue()) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index 4b81c47f63..62e2fd43d0 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -65,7 +65,7 @@ func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMach func previousProgress(vmop *v1alpha2.VirtualMachineOperation) int32 { if vmop == nil || vmop.Status.Progress == nil { - return syncRangeMin + return SyncRangeMin } return *vmop.Status.Progress } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go index fb80d7c98f..668926fe65 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -35,8 +35,8 @@ func TestBuildRecord_NilVMOPAndMigration(t *testing.T) { if !record.StartedAt.Equal(now) { t.Fatalf("expected StartedAt=%v, got %v", now, record.StartedAt) } - if record.PreviousProgress != syncRangeMin { - t.Fatalf("expected PreviousProgress=%d, got %d", syncRangeMin, record.PreviousProgress) + if record.PreviousProgress != SyncRangeMin { + t.Fatalf("expected PreviousProgress=%d, got %d", SyncRangeMin, record.PreviousProgress) } if record.DataTotalMiB != unknownMetric || record.DataProcessedMiB != unknownMetric || record.DataRemainingMiB != unknownMetric { t.Fatalf("expected unknown metrics, got total=%v processed=%v remaining=%v", record.DataTotalMiB, record.DataProcessedMiB, record.DataRemainingMiB) @@ -105,12 +105,12 @@ func TestPreviousProgress(t *testing.T) { { name: "nil vmop", vmop: nil, - want: syncRangeMin, + want: SyncRangeMin, }, { name: "nil progress", vmop: &v1alpha2.VirtualMachineOperation{}, - want: syncRangeMin, + want: SyncRangeMin, }, { name: "explicit progress", diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 453bb61afd..04c1f18e04 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -24,8 +24,8 @@ import ( ) const ( - syncRangeMin int32 = 10 - syncRangeMax int32 = 90 + SyncRangeMin int32 = 10 + SyncRangeMax int32 = 90 // These coefficients tune the degraded-mode progress estimation when KubeVirt // does not expose byte counters for migration transfer state. The algorithm @@ -155,7 +155,7 @@ func applyMonotonicStallBump(previous, current int32, elapsedSec float64, iterat func mapToSyncRange(internal float64) int32 { normalized := (clampFloat(internal, progressStartPercent, progressIterativeCeiling) - progressStartPercent) / (progressIterativeCeiling - progressStartPercent) - mapped := float64(syncRangeMin) + normalized*float64(syncRangeMax-syncRangeMin) + mapped := float64(SyncRangeMin) + normalized*float64(SyncRangeMax-SyncRangeMin) return clampSyncRange(int32(math.Round(mapped))) } @@ -170,11 +170,11 @@ func clampFloat(v, minV, maxV float64) float64 { } func clampSyncRange(v int32) int32 { - if v < syncRangeMin { - return syncRangeMin + if v < SyncRangeMin { + return SyncRangeMin } - if v > syncRangeMax { - return syncRangeMax + if v > SyncRangeMax { + return SyncRangeMax } return v } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 9f07f56b4f..5e4fb832e1 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -62,8 +62,8 @@ func TestProgress_SyncRangeCaps(t *testing.T) { DataRemainingMiB: 0, }) - if progress < syncRangeMin || progress > syncRangeMax { - t.Fatalf("expected progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, progress) + if progress < SyncRangeMin || progress > SyncRangeMax { + t.Fatalf("expected progress in sync range [%d,%d], got=%d", SyncRangeMin, SyncRangeMax, progress) } } @@ -97,8 +97,8 @@ func TestProgress_DegradedModeWithoutMetrics(t *testing.T) { DataRemainingMiB: unknownMetric, }) - if progress < syncRangeMin || progress > syncRangeMax { - t.Fatalf("expected degraded-mode progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, progress) + if progress < SyncRangeMin || progress > SyncRangeMax { + t.Fatalf("expected degraded-mode progress in sync range [%d,%d], got=%d", SyncRangeMin, SyncRangeMax, progress) } } @@ -115,7 +115,7 @@ func TestProgress_WithMetricsInBulkPhase(t *testing.T) { DataProcessedMiB: 512, }) - if progress <= syncRangeMin || progress >= syncRangeMax { + if progress <= SyncRangeMin || progress >= SyncRangeMax { t.Fatalf("expected bulk progress strictly inside sync range, got=%d", progress) } } @@ -147,8 +147,8 @@ func TestProgress_WithMetricsInIterativePhase(t *testing.T) { if iterative <= bulk { t.Fatalf("expected iterative progress to be greater than bulk progress, bulk=%d iterative=%d", bulk, iterative) } - if iterative < syncRangeMin || iterative > syncRangeMax { - t.Fatalf("expected iterative progress in sync range [%d,%d], got=%d", syncRangeMin, syncRangeMax, iterative) + if iterative < SyncRangeMin || iterative > SyncRangeMax { + t.Fatalf("expected iterative progress in sync range [%d,%d], got=%d", SyncRangeMin, SyncRangeMax, iterative) } } @@ -166,8 +166,8 @@ func TestProgress_UsesRemainingDataFallback(t *testing.T) { DataRemainingMiB: 25, }) - if progress <= syncRangeMin { - t.Fatalf("expected fallback metric progress above syncRangeMin, got=%d", progress) + if progress <= SyncRangeMin { + t.Fatalf("expected fallback metric progress above SyncRangeMin, got=%d", progress) } } @@ -178,12 +178,12 @@ func TestProgress_ZeroElapsed(t *testing.T) { progress := p.SyncProgress(Record{ Now: now, StartedAt: now, - PreviousProgress: syncRangeMin, + PreviousProgress: SyncRangeMin, Phase: virtv1.MigrationPending, }) - if progress != syncRangeMin { - t.Fatalf("expected zero elapsed progress=%d, got=%d", syncRangeMin, progress) + if progress != SyncRangeMin { + t.Fatalf("expected zero elapsed progress=%d, got=%d", SyncRangeMin, progress) } } @@ -200,8 +200,8 @@ func TestProgress_VeryLargeElapsedIsCapped(t *testing.T) { Iteration: 1, }) - if progress != syncRangeMax { - t.Fatalf("expected capped progress=%d, got=%d", syncRangeMax, progress) + if progress != SyncRangeMax { + t.Fatalf("expected capped progress=%d, got=%d", SyncRangeMax, progress) } } @@ -255,10 +255,10 @@ func TestProgress_StallBumpNotAppliedEarly(t *testing.T) { } func TestMapToSyncRangeBoundaries(t *testing.T) { - if got := mapToSyncRange(progressStartPercent); got != syncRangeMin { - t.Fatalf("expected lower boundary=%d, got=%d", syncRangeMin, got) + if got := mapToSyncRange(progressStartPercent); got != SyncRangeMin { + t.Fatalf("expected lower boundary=%d, got=%d", SyncRangeMin, got) } - if got := mapToSyncRange(progressIterativeCeiling); got != syncRangeMax { - t.Fatalf("expected upper boundary=%d, got=%d", syncRangeMax, got) + if got := mapToSyncRange(progressIterativeCeiling); got != SyncRangeMax { + t.Fatalf("expected upper boundary=%d, got=%d", SyncRangeMax, got) } } From 27ee884e7af444252c8d4cfd09849179b55e8b83 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 18:13:13 +0200 Subject: [PATCH 06/27] feat(core, kubevirt): use migration transfer counters for vmop progress Read migration byte counters from VirtualMachineInstanceMigration status and feed them into VMOP migration progress calculation. Keep degraded mode as a fallback when counters are absent and use a local kubevirt.io/api replace for the patched 3p-kubevirt module during development. Signed-off-by: Daniil Antoshin --- images/virtualization-artifact/go.mod | 2 +- .../migration/internal/progress/mapper.go | 15 +++++++---- .../internal/progress/mapper_test.go | 24 +++++++++++++++-- .../migration/internal/progress/progress.go | 26 +++++++++++++++---- .../internal/progress/progress_test.go | 26 +++++++++++++++++++ 5 files changed, 80 insertions(+), 13 deletions(-) diff --git a/images/virtualization-artifact/go.mod b/images/virtualization-artifact/go.mod index bade8ad4ec..6f50f27f04 100644 --- a/images/virtualization-artifact/go.mod +++ b/images/virtualization-artifact/go.mod @@ -168,4 +168,4 @@ replace ( ) // Kubevirt API replaces -replace kubevirt.io/api => github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.21 +replace kubevirt.io/api => ../../../3p-kubevirt/staging/src/kubevirt.io/api diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index 62e2fd43d0..8d090f9d4a 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -27,11 +27,6 @@ import ( const unknownMetric = -1.0 // BuildRecord maps KubeVirt migration status to progress algorithm inputs. -// -// KubeVirt v1.6 does not expose transferred/remaining byte counters in -// VirtualMachineInstanceMigrationStatus, therefore data metrics are mapped to -// unknown values and Progress runs in deterministic degraded mode -// (time+phase based with stall bump). func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, now time.Time) Record { record := Record{ Now: now, @@ -58,11 +53,21 @@ func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMach record.Mode = state.Mode record.Iteration = mapIteration(state) record.Throttle = mapThrottle(state) + record.DataTotalMiB = mapBytesToMiB(state.DataTotalBytes) + record.DataProcessedMiB = mapBytesToMiB(state.DataProcessedBytes) + record.DataRemainingMiB = mapBytesToMiB(state.DataRemainingBytes) } return record } +func mapBytesToMiB(v *uint64) float64 { + if v == nil { + return unknownMetric + } + return float64(*v) / (1024.0 * 1024.0) +} + func previousProgress(vmop *v1alpha2.VirtualMachineOperation) int32 { if vmop == nil || vmop.Status.Progress == nil { return SyncRangeMin diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go index 668926fe65..a71f120ea5 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -64,12 +64,18 @@ func TestBuildRecord_UsesMigrationState(t *testing.T) { now := time.Unix(1710000000, 0) start := metav1.NewTime(now.Add(-5 * time.Minute)) autoConverge := true + totalBytes := uint64(1024 * 1024 * 1024) + processedBytes := uint64(512 * 1024 * 1024) + remainingBytes := uint64(256 * 1024 * 1024) mig := &virtv1.VirtualMachineInstanceMigration{ Status: virtv1.VirtualMachineInstanceMigrationStatus{ Phase: virtv1.MigrationRunning, MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ - StartTimestamp: &start, - Mode: virtv1.MigrationPostCopy, + StartTimestamp: &start, + Mode: virtv1.MigrationPostCopy, + DataTotalBytes: &totalBytes, + DataProcessedBytes: &processedBytes, + DataRemainingBytes: &remainingBytes, MigrationConfiguration: &virtv1.MigrationConfiguration{ AllowAutoConverge: &autoConverge, }, @@ -94,6 +100,9 @@ func TestBuildRecord_UsesMigrationState(t *testing.T) { if record.Throttle != 1.0 { t.Fatalf("expected Throttle=1.0, got %v", record.Throttle) } + if record.DataTotalMiB != 1024 || record.DataProcessedMiB != 512 || record.DataRemainingMiB != 256 { + t.Fatalf("expected mapped MiB counters, got total=%v processed=%v remaining=%v", record.DataTotalMiB, record.DataProcessedMiB, record.DataRemainingMiB) + } } func TestPreviousProgress(t *testing.T) { @@ -165,6 +174,17 @@ func TestMapIteration(t *testing.T) { } } +func TestMapBytesToMiB(t *testing.T) { + if got := mapBytesToMiB(nil); got != unknownMetric { + t.Fatalf("expected unknown metric for nil, got %v", got) + } + + bytes := uint64(3 * 1024 * 1024) + if got := mapBytesToMiB(&bytes); got != 3 { + t.Fatalf("expected 3 MiB, got %v", got) + } +} + func TestMapThrottle(t *testing.T) { tests := []struct { name string diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 04c1f18e04..c276d1467d 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -100,12 +100,28 @@ func (p *Progress) SyncProgress(record Record) int32 { } func metricPercent(record Record) (float64, bool) { - if record.DataTotalMiB > 0 && record.DataProcessedMiB >= 0 { - return clampFloat((record.DataProcessedMiB/record.DataTotalMiB)*100.0, 0, 100), true + if record.DataTotalMiB <= 0 { + return 0, false } - if record.DataTotalMiB > 0 && record.DataRemainingMiB >= 0 { - processed := record.DataTotalMiB - record.DataRemainingMiB - return clampFloat((processed/record.DataTotalMiB)*100.0, 0, 100), true + + processed, hasProcessed := normalizedProcessedMiB(record) + if !hasProcessed { + return 0, false + } + + return clampFloat((processed/record.DataTotalMiB)*100.0, 0, 100), true +} + +func normalizedProcessedMiB(record Record) (float64, bool) { + if record.DataTotalMiB <= 0 { + return 0, false + } + + if record.DataProcessedMiB >= 0 { + return clampFloat(record.DataProcessedMiB, 0, record.DataTotalMiB), true + } + if record.DataRemainingMiB >= 0 { + return clampFloat(record.DataTotalMiB-record.DataRemainingMiB, 0, record.DataTotalMiB), true } return 0, false } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 5e4fb832e1..bd16221b09 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -171,6 +171,32 @@ func TestProgress_UsesRemainingDataFallback(t *testing.T) { } } +func TestMetricPercent_ClampsProcessedAboveTotal(t *testing.T) { + metricPct, hasMetric := metricPercent(Record{DataTotalMiB: 100, DataProcessedMiB: 200}) + if !hasMetric { + t.Fatal("expected metric to be available") + } + if metricPct != 100 { + t.Fatalf("expected clamped metric percent=100, got=%v", metricPct) + } +} + +func TestMetricPercent_ClampsRemainingAboveTotal(t *testing.T) { + metricPct, hasMetric := metricPercent(Record{DataTotalMiB: 100, DataRemainingMiB: 200}) + if !hasMetric { + t.Fatal("expected metric to be available") + } + if metricPct != 0 { + t.Fatalf("expected clamped metric percent=0, got=%v", metricPct) + } +} + +func TestMetricPercent_RequiresPositiveTotal(t *testing.T) { + if _, hasMetric := metricPercent(Record{DataTotalMiB: 0, DataProcessedMiB: 10}); hasMetric { + t.Fatal("expected metric to be unavailable for zero total") + } +} + func TestProgress_ZeroElapsed(t *testing.T) { now := time.Now() p := NewProgress() From 9ac043ec28fbe81340c490916f4252c6b8de2e84 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 18:14:47 +0200 Subject: [PATCH 07/27] fix(core): reduce repeated vmop migration stall bumps Keep degraded-mode stall bump monotonic, but avoid issuing another artificial increment when the computed base progress only trails the previous value by the prior bump. This removes the near +1-per-reconcile behavior while preserving the fallback path for migrations without byte counters. Signed-off-by: Daniil Antoshin --- .../migration/internal/progress/progress.go | 28 +++++++++++-------- .../internal/progress/progress_test.go | 7 +++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index c276d1467d..6191801d52 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -153,19 +153,25 @@ func isIterative(record Record, elapsedSec float64) bool { func applyMonotonicStallBump(previous, current int32, elapsedSec float64, iterative bool) int32 { prev := clampSyncRange(previous) - if current < prev { - current = prev - } - if current == prev { - window := float64(progressBulkStallSeconds) - if iterative { - window = float64(progressIterStallSeconds) - } - if elapsedSec >= window { - current = clampSyncRange(prev + 1) + base := clampSyncRange(current) + if base < prev { + if prev-base <= 1 { + return prev } + base = prev + } + if base > prev { + return base + } + + window := float64(progressBulkStallSeconds) + if iterative { + window = float64(progressIterStallSeconds) + } + if elapsedSec >= window { + return clampSyncRange(prev + 1) } - return clampSyncRange(current) + return prev } func mapToSyncRange(internal float64) int32 { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index bd16221b09..949f0109e1 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -280,6 +280,13 @@ func TestProgress_StallBumpNotAppliedEarly(t *testing.T) { } } +func TestProgress_StallBumpDoesNotRepeatOnRegressedBase(t *testing.T) { + got := applyMonotonicStallBump(71, 70, float64(progressBulkStallSeconds+10), false) + if got != 71 { + t.Fatalf("expected previous progress to be preserved without repeated bump, got=%d", got) + } +} + func TestMapToSyncRangeBoundaries(t *testing.T) { if got := mapToSyncRange(progressStartPercent); got != SyncRangeMin { t.Fatalf("expected lower boundary=%d, got=%d", SyncRangeMin, got) From 5326ef6cb2497b51c3a0c66ceda1f8bb20628e69 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 18:21:38 +0200 Subject: [PATCH 08/27] chore(core): add 3p-kubevirt branch info Signed-off-by: Daniil Antoshin --- images/virt-artifact/werf.inc.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/images/virt-artifact/werf.inc.yaml b/images/virt-artifact/werf.inc.yaml index f30560fba6..9e0d340bfb 100644 --- a/images/virt-artifact/werf.inc.yaml +++ b/images/virt-artifact/werf.inc.yaml @@ -2,7 +2,7 @@ # Source https://github.com/kubevirt/kubevirt/blob/v1.3.1/hack/dockerized#L15 {{- $gitRepoName := "3p-kubevirt" }} {{- $gitRepoUrl := (printf "%s/%s" "deckhouse" $gitRepoName) }} -{{- $tag := get $.Core $gitRepoName }} +{{- $tag := "feat/vm/migration-progress" }} {{- $version := (split "-" $tag)._0 }} --- @@ -13,8 +13,10 @@ secrets: - id: SOURCE_REPO value: {{ $.SOURCE_REPO }} shell: + installCacheVersion: "{{ now | date "Mon Jan 2 15:04:05 MST 2006" }}" install: - | + echo "$date" echo "Git clone {{ $gitRepoName }} repository..." git clone --depth=1 $(cat /run/secrets/SOURCE_REPO)/{{ $gitRepoUrl }} --branch {{ $tag }} /src/kubevirt From 1d9581af1fed85db233619df0edb06647a7b8571 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 22:23:02 +0200 Subject: [PATCH 09/27] chore(core): add corrent kubevirt replace Signed-off-by: Daniil Antoshin --- images/virtualization-artifact/go.mod | 2 +- images/virtualization-artifact/go.sum | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/images/virtualization-artifact/go.mod b/images/virtualization-artifact/go.mod index 6f50f27f04..ae235edfe1 100644 --- a/images/virtualization-artifact/go.mod +++ b/images/virtualization-artifact/go.mod @@ -168,4 +168,4 @@ replace ( ) // Kubevirt API replaces -replace kubevirt.io/api => ../../../3p-kubevirt/staging/src/kubevirt.io/api +replace kubevirt.io/api => github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402202241-26754f27943f diff --git a/images/virtualization-artifact/go.sum b/images/virtualization-artifact/go.sum index 0ec653e4a0..a9b0bdd007 100644 --- a/images/virtualization-artifact/go.sum +++ b/images/virtualization-artifact/go.sum @@ -49,8 +49,16 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.21 h1:W0nSf7fOTTLn5lVqR4JR3KctrABzyqb/sCkmSPx2fEY= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.21/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402160406-eb9323ea38fe h1:Bn6aPvoYRUZ6/bl8SJFCh/bBpMAK2TfURZzVISMQNyU= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402160406-eb9323ea38fe/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402174858-752ba2a474c2 h1:/3SbjkTaqmtv58D0SCRJ0BHx1M4YGc8ZIfL77CZWCKM= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402174858-752ba2a474c2/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402185324-f232a402d1db h1:XuiSghUwOKozxc3AnMoWrHW6bmkT6IhGl8KxaMT3hMc= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402185324-f232a402d1db/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402193305-800b150c65d6 h1:k4Ou4sMjZhcSBEjGVXjC/XVmc0zfcOmgK+VC9cDHBH8= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402193305-800b150c65d6/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402202241-26754f27943f h1:kvqBoVTdqHkgOshP0AwRlcydg7e+feNKgGaW2JstlNU= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402202241-26754f27943f/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5 h1:PsN1E0oxC/+4zdA977txrqUCuObFL3HAuu5Xnud8m8c= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5/go.mod h1:Mk5HRzkc5pIcDIZ2JJ6DPuuqnwhXVkb3you8M8Mg+4w= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= From b8eb134bf73b988cd99b36b53050027455316f59 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Thu, 2 Apr 2026 23:45:32 +0200 Subject: [PATCH 10/27] fix(core): update embedded kubevirt migration counters schema Signed-off-by: Daniil Antoshin --- crds/embedded/virtualmachineinstances.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crds/embedded/virtualmachineinstances.yaml b/crds/embedded/virtualmachineinstances.yaml index c94463fec5..4bbe125e1b 100644 --- a/crds/embedded/virtualmachineinstances.yaml +++ b/crds/embedded/virtualmachineinstances.yaml @@ -3989,6 +3989,21 @@ spec: completed: description: Indicates the migration completed type: boolean + dataProcessedBytes: + description: DataProcessedBytes is the amount of migration data already + processed by the source runtime. + format: int64 + type: integer + dataRemainingBytes: + description: DataRemainingBytes is the amount of migration data still + remaining on the source runtime. + format: int64 + type: integer + dataTotalBytes: + description: DataTotalBytes is the total amount of migration data reported + by the source runtime. + format: int64 + type: integer endTimestamp: description: The time the migration action ended format: date-time From a22eccad8f34117a3d6237370698a9047001224e Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 11:50:19 +0200 Subject: [PATCH 11/27] feat(core): improve migration progress tracking Signed-off-by: Daniil Antoshin --- crds/embedded/virtualmachineinstances.yaml | 10 + images/virtualization-artifact/go.mod | 2 +- images/virtualization-artifact/go.sum | 10 - .../migration/internal/handler/lifecycle.go | 11 + .../internal/handler/lifecycle_test.go | 12 +- .../migration/internal/progress/mapper.go | 46 +-- .../internal/progress/mapper_test.go | 132 +++++---- .../migration/internal/progress/progress.go | 275 ++++++++++++------ .../internal/progress/progress_test.go | 130 +++++---- .../vmop/migration/internal/progress/store.go | 75 +++++ .../migration/internal/progress/store_test.go | 60 ++++ 11 files changed, 514 insertions(+), 249 deletions(-) create mode 100644 images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go create mode 100644 images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store_test.go diff --git a/crds/embedded/virtualmachineinstances.yaml b/crds/embedded/virtualmachineinstances.yaml index 4bbe125e1b..8868aa1a14 100644 --- a/crds/embedded/virtualmachineinstances.yaml +++ b/crds/embedded/virtualmachineinstances.yaml @@ -4004,6 +4004,16 @@ spec: by the source runtime. format: int64 type: integer + iteration: + description: Iteration is the current migration iteration reported by + the source runtime. + format: int32 + type: integer + autoConvergeThrottle: + description: AutoConvergeThrottle is the current auto-converge throttle + reported by the source runtime. + format: int32 + type: integer endTimestamp: description: The time the migration action ended format: date-time diff --git a/images/virtualization-artifact/go.mod b/images/virtualization-artifact/go.mod index ae235edfe1..6f50f27f04 100644 --- a/images/virtualization-artifact/go.mod +++ b/images/virtualization-artifact/go.mod @@ -168,4 +168,4 @@ replace ( ) // Kubevirt API replaces -replace kubevirt.io/api => github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402202241-26754f27943f +replace kubevirt.io/api => ../../../3p-kubevirt/staging/src/kubevirt.io/api diff --git a/images/virtualization-artifact/go.sum b/images/virtualization-artifact/go.sum index a9b0bdd007..cd0b924afb 100644 --- a/images/virtualization-artifact/go.sum +++ b/images/virtualization-artifact/go.sum @@ -49,16 +49,6 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402160406-eb9323ea38fe h1:Bn6aPvoYRUZ6/bl8SJFCh/bBpMAK2TfURZzVISMQNyU= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402160406-eb9323ea38fe/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402174858-752ba2a474c2 h1:/3SbjkTaqmtv58D0SCRJ0BHx1M4YGc8ZIfL77CZWCKM= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402174858-752ba2a474c2/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402185324-f232a402d1db h1:XuiSghUwOKozxc3AnMoWrHW6bmkT6IhGl8KxaMT3hMc= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402185324-f232a402d1db/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402193305-800b150c65d6 h1:k4Ou4sMjZhcSBEjGVXjC/XVmc0zfcOmgK+VC9cDHBH8= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402193305-800b150c65d6/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402202241-26754f27943f h1:kvqBoVTdqHkgOshP0AwRlcydg7e+feNKgGaW2JstlNU= -github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v1.6.2-v12n.16.0.20260402202241-26754f27943f/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5 h1:PsN1E0oxC/+4zdA977txrqUCuObFL3HAuu5Xnud8m8c= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5/go.mod h1:Mk5HRzkc5pIcDIZ2JJ6DPuuqnwhXVkb3you8M8Mg+4w= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index a31fad2245..582454946c 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -608,12 +608,16 @@ func (h LifecycleHandler) calculateMigrationProgress( record := migrationprogress.BuildRecord(vmop, mig, time.Now()) return h.progressStrategy.SyncProgress(record) case vmopcondition.ReasonSourceSuspended: + h.forgetProgress(vmop) return progressSourceSuspended case vmopcondition.ReasonTargetResumed: + h.forgetProgress(vmop) return progressTargetResumed case vmopcondition.ReasonMigrationCompleted: + h.forgetProgress(vmop) return progressMigrationCompleted default: + h.forgetProgress(vmop) if vmop != nil && vmop.Status.Progress != nil { return *vmop.Status.Progress } @@ -621,6 +625,13 @@ func (h LifecycleHandler) calculateMigrationProgress( } } +func (h LifecycleHandler) forgetProgress(vmop *v1alpha2.VirtualMachineOperation) { + if h.progressStrategy == nil || vmop == nil { + return + } + h.progressStrategy.Forget(vmop.UID) +} + func (h LifecycleHandler) getTargetPod(ctx context.Context, mig *virtv1.VirtualMachineInstanceMigration) (*corev1.Pod, error) { selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{ diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 3b17ef3c79..6362f54299 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -25,6 +25,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/component-base/featuregate" "k8s.io/utils/ptr" virtv1 "kubevirt.io/api/core/v1" @@ -46,13 +47,18 @@ import ( ) type progressStrategyStub struct { - value int32 + value int32 + forgotten []types.UID } -func (s progressStrategyStub) SyncProgress(_ migrationprogress.Record) int32 { +func (s *progressStrategyStub) SyncProgress(_ migrationprogress.Record) int32 { return s.value } +func (s *progressStrategyStub) Forget(uid types.UID) { + s.forgotten = append(s.forgotten, uid) +} + var _ = Describe("LifecycleHandler", func() { const ( name = "test" @@ -376,7 +382,7 @@ var _ = Describe("LifecycleHandler", func() { ) DescribeTable("should map progress by reason", func(reason vmopcondition.ReasonCompleted, initial *int32, expected int32) { - h := LifecycleHandler{progressStrategy: progressStrategyStub{value: 55}} + h := LifecycleHandler{progressStrategy: &progressStrategyStub{value: 55}} vmop := &v1alpha2.VirtualMachineOperation{Status: v1alpha2.VirtualMachineOperationStatus{Progress: initial}} mig := &virtv1.VirtualMachineInstanceMigration{} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index 8d090f9d4a..17f125a58f 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -19,6 +19,7 @@ package progress import ( "time" + "k8s.io/apimachinery/pkg/types" virtv1 "kubevirt.io/api/core/v1" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -26,7 +27,6 @@ import ( const unknownMetric = -1.0 -// BuildRecord maps KubeVirt migration status to progress algorithm inputs. func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, now time.Time) Record { record := Record{ Now: now, @@ -38,6 +38,7 @@ func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMach } if vmop != nil { + record.OperationUID = vmop.UID record.StartedAt = vmop.CreationTimestamp.Time } @@ -51,8 +52,9 @@ func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMach record.StartedAt = state.StartTimestamp.Time } record.Mode = state.Mode - record.Iteration = mapIteration(state) - record.Throttle = mapThrottle(state) + record.Iteration, record.HasIteration = mapIteration(state) + record.AutoConvergeThrottle, record.HasThrottle = mapThrottle(state) + record.Throttle = normalizeThrottle(record.AutoConvergeThrottle, record.HasThrottle) record.DataTotalMiB = mapBytesToMiB(state.DataTotalBytes) record.DataProcessedMiB = mapBytesToMiB(state.DataProcessedBytes) record.DataRemainingMiB = mapBytesToMiB(state.DataRemainingBytes) @@ -75,30 +77,30 @@ func previousProgress(vmop *v1alpha2.VirtualMachineOperation) int32 { return *vmop.Status.Progress } -// mapIteration approximates iterative phase: post-copy and paused modes are -// treated as iterative (>0), otherwise pre-copy stays at iteration 0. -func mapIteration(state *virtv1.VirtualMachineInstanceMigrationState) int32 { - if state == nil { - return 0 +func mapIteration(state *virtv1.VirtualMachineInstanceMigrationState) (uint32, bool) { + if state == nil || state.Iteration == nil { + return 0, false } - if state.Mode == virtv1.MigrationPostCopy || state.Mode == virtv1.MigrationPaused { - return 1 + return *state.Iteration, true +} + +func mapThrottle(state *virtv1.VirtualMachineInstanceMigrationState) (uint32, bool) { + if state == nil || state.AutoConvergeThrottle == nil { + return 0, false } - return 0 + return *state.AutoConvergeThrottle, true } -// mapThrottle provides deterministic throttle approximation from available -// flags: auto-converge implies elevated throttle, post-copy/paused implies max. -func mapThrottle(state *virtv1.VirtualMachineInstanceMigrationState) float64 { - if state == nil { +func normalizeThrottle(raw uint32, ok bool) float64 { + if !ok { return 0 } - throttle := 0.0 - if state.MigrationConfiguration != nil && state.MigrationConfiguration.AllowAutoConverge != nil && *state.MigrationConfiguration.AllowAutoConverge { - throttle = 0.7 - } - if state.Mode == virtv1.MigrationPostCopy || state.Mode == virtv1.MigrationPaused { - throttle = 1.0 + return clampFloat(float64(raw)/100.0, 0, 1) +} + +func operationUID(vmop *v1alpha2.VirtualMachineOperation) types.UID { + if vmop == nil { + return "" } - return throttle + return vmop.UID } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go index a71f120ea5..c5c55cb59d 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -21,6 +21,7 @@ import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" virtv1 "kubevirt.io/api/core/v1" @@ -46,12 +47,18 @@ func TestBuildRecord_NilVMOPAndMigration(t *testing.T) { func TestBuildRecord_UsesVMOPCreationTimestampAndPreviousProgress(t *testing.T) { now := time.Unix(1710000000, 0) vmop := &v1alpha2.VirtualMachineOperation{ - ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(now.Add(-3 * time.Minute))}, - Status: v1alpha2.VirtualMachineOperationStatus{Progress: ptr.To[int32](42)}, + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID("vmop-uid"), + CreationTimestamp: metav1.NewTime(now.Add(-3 * time.Minute)), + }, + Status: v1alpha2.VirtualMachineOperationStatus{Progress: ptr.To[int32](42)}, } record := BuildRecord(vmop, nil, now) + if record.OperationUID != vmop.UID { + t.Fatalf("expected OperationUID=%s, got %s", vmop.UID, record.OperationUID) + } if !record.StartedAt.Equal(vmop.CreationTimestamp.Time) { t.Fatalf("expected StartedAt=%v, got %v", vmop.CreationTimestamp.Time, record.StartedAt) } @@ -63,22 +70,22 @@ func TestBuildRecord_UsesVMOPCreationTimestampAndPreviousProgress(t *testing.T) func TestBuildRecord_UsesMigrationState(t *testing.T) { now := time.Unix(1710000000, 0) start := metav1.NewTime(now.Add(-5 * time.Minute)) - autoConverge := true totalBytes := uint64(1024 * 1024 * 1024) processedBytes := uint64(512 * 1024 * 1024) remainingBytes := uint64(256 * 1024 * 1024) + iteration := uint32(10) + autoConvergeThrottle := uint32(50) mig := &virtv1.VirtualMachineInstanceMigration{ Status: virtv1.VirtualMachineInstanceMigrationStatus{ Phase: virtv1.MigrationRunning, MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ - StartTimestamp: &start, - Mode: virtv1.MigrationPostCopy, - DataTotalBytes: &totalBytes, - DataProcessedBytes: &processedBytes, - DataRemainingBytes: &remainingBytes, - MigrationConfiguration: &virtv1.MigrationConfiguration{ - AllowAutoConverge: &autoConverge, - }, + StartTimestamp: &start, + Mode: virtv1.MigrationPreCopy, + Iteration: &iteration, + AutoConvergeThrottle: &autoConvergeThrottle, + DataTotalBytes: &totalBytes, + DataProcessedBytes: &processedBytes, + DataRemainingBytes: &remainingBytes, }, }, } @@ -91,14 +98,17 @@ func TestBuildRecord_UsesMigrationState(t *testing.T) { if !record.StartedAt.Equal(start.Time) { t.Fatalf("expected StartedAt=%v, got %v", start.Time, record.StartedAt) } - if record.Mode != virtv1.MigrationPostCopy { - t.Fatalf("expected Mode=%s, got %s", virtv1.MigrationPostCopy, record.Mode) + if record.Mode != virtv1.MigrationPreCopy { + t.Fatalf("expected Mode=%s, got %s", virtv1.MigrationPreCopy, record.Mode) + } + if !record.HasIteration || record.Iteration != 10 { + t.Fatalf("expected Iteration=10 with flag, got value=%d has=%v", record.Iteration, record.HasIteration) } - if record.Iteration != 1 { - t.Fatalf("expected Iteration=1, got %d", record.Iteration) + if !record.HasThrottle || record.AutoConvergeThrottle != 50 { + t.Fatalf("expected AutoConvergeThrottle=50 with flag, got value=%d has=%v", record.AutoConvergeThrottle, record.HasThrottle) } - if record.Throttle != 1.0 { - t.Fatalf("expected Throttle=1.0, got %v", record.Throttle) + if record.Throttle != 0.5 { + t.Fatalf("expected normalized Throttle=0.5, got %v", record.Throttle) } if record.DataTotalMiB != 1024 || record.DataProcessedMiB != 512 || record.DataRemainingMiB != 256 { t.Fatalf("expected mapped MiB counters, got total=%v processed=%v remaining=%v", record.DataTotalMiB, record.DataProcessedMiB, record.DataRemainingMiB) @@ -139,36 +149,36 @@ func TestPreviousProgress(t *testing.T) { func TestMapIteration(t *testing.T) { tests := []struct { - name string - state *virtv1.VirtualMachineInstanceMigrationState - want int32 + name string + state *virtv1.VirtualMachineInstanceMigrationState + want uint32 + wantSet bool }{ { - name: "nil state", - state: nil, - want: 0, - }, - { - name: "pre-copy", - state: &virtv1.VirtualMachineInstanceMigrationState{Mode: virtv1.MigrationPreCopy}, - want: 0, + name: "nil state", + state: nil, + want: 0, + wantSet: false, }, { - name: "post-copy", - state: &virtv1.VirtualMachineInstanceMigrationState{Mode: virtv1.MigrationPostCopy}, - want: 1, + name: "missing iteration", + state: &virtv1.VirtualMachineInstanceMigrationState{}, + want: 0, + wantSet: false, }, { - name: "paused", - state: &virtv1.VirtualMachineInstanceMigrationState{Mode: virtv1.MigrationPaused}, - want: 1, + name: "explicit iteration", + state: &virtv1.VirtualMachineInstanceMigrationState{Iteration: ptr.To[uint32](7)}, + want: 7, + wantSet: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := mapIteration(tt.state); got != tt.want { - t.Fatalf("mapIteration() = %d, want %d", got, tt.want) + got, gotSet := mapIteration(tt.state) + if got != tt.want || gotSet != tt.wantSet { + t.Fatalf("mapIteration() = (%d,%v), want (%d,%v)", got, gotSet, tt.want, tt.wantSet) } }) } @@ -187,41 +197,43 @@ func TestMapBytesToMiB(t *testing.T) { func TestMapThrottle(t *testing.T) { tests := []struct { - name string - state *virtv1.VirtualMachineInstanceMigrationState - want float64 + name string + state *virtv1.VirtualMachineInstanceMigrationState + wantRaw uint32 + wantSet bool + wantValue float64 }{ { - name: "nil state", - state: nil, - want: 0, + name: "nil state", + state: nil, + wantRaw: 0, + wantSet: false, + wantValue: 0, }, { - name: "default throttle", - state: &virtv1.VirtualMachineInstanceMigrationState{}, - want: 0, + name: "missing throttle", + state: &virtv1.VirtualMachineInstanceMigrationState{}, + wantRaw: 0, + wantSet: false, + wantValue: 0, }, { - name: "auto converge", - state: &virtv1.VirtualMachineInstanceMigrationState{ - MigrationConfiguration: &virtv1.MigrationConfiguration{AllowAutoConverge: ptr.To(true)}, - }, - want: 0.7, - }, - { - name: "post-copy overrides throttle", - state: &virtv1.VirtualMachineInstanceMigrationState{ - Mode: virtv1.MigrationPostCopy, - MigrationConfiguration: &virtv1.MigrationConfiguration{AllowAutoConverge: ptr.To(true)}, - }, - want: 1, + name: "explicit throttle", + state: &virtv1.VirtualMachineInstanceMigrationState{AutoConvergeThrottle: ptr.To[uint32](70)}, + wantRaw: 70, + wantSet: true, + wantValue: 0.7, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := mapThrottle(tt.state); got != tt.want { - t.Fatalf("mapThrottle() = %v, want %v", got, tt.want) + raw, gotSet := mapThrottle(tt.state) + if raw != tt.wantRaw || gotSet != tt.wantSet { + t.Fatalf("mapThrottle() = (%d,%v), want (%d,%v)", raw, gotSet, tt.wantRaw, tt.wantSet) + } + if got := normalizeThrottle(raw, gotSet); got != tt.wantValue { + t.Fatalf("normalizeThrottle() = %v, want %v", got, tt.wantValue) } }) } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 6191801d52..585a4e98b2 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -20,6 +20,7 @@ import ( "math" "time" + "k8s.io/apimachinery/pkg/types" virtv1 "kubevirt.io/api/core/v1" ) @@ -27,76 +28,102 @@ const ( SyncRangeMin int32 = 10 SyncRangeMax int32 = 90 - // These coefficients tune the degraded-mode progress estimation when KubeVirt - // does not expose byte counters for migration transfer state. The algorithm - // keeps early stages below the sync range, maps active data synchronization - // into [10,90], and preserves monotonic growth with a small stall bump. - progressStartPercent = 3.0 - progressBulkCeiling = 45.0 - progressIterativeCeiling = 98.0 - progressBulkWeightMetric = 0.80 - progressBulkWeightTime = 0.20 - progressIterWeightMetric = 0.76 - progressIterWeightTime = 0.24 - progressBulkTimeRate = 0.45 - progressIterBaseTimeRate = 0.22 - progressIterThrottleRate = 0.18 - progressBulkStallSeconds = 45 - progressIterStallSeconds = 30 - progressBulkDurationGuess = 90.0 + bulkCeiling = 48.0 + iterativeFloor = 46.0 + iterativeCeiling = 90.0 + bulkStallWindow = 45 * time.Second + iterStallWindow = 25 * time.Second ) type Strategy interface { SyncProgress(record Record) int32 + Forget(uid types.UID) } type Record struct { - Now time.Time - StartedAt time.Time - PreviousProgress int32 - Phase virtv1.VirtualMachineInstanceMigrationPhase - Mode virtv1.MigrationMode - Iteration int32 - Throttle float64 - DataTotalMiB float64 - DataProcessedMiB float64 - DataRemainingMiB float64 + OperationUID types.UID + Now time.Time + StartedAt time.Time + PreviousProgress int32 + Phase virtv1.VirtualMachineInstanceMigrationPhase + Mode virtv1.MigrationMode + HasIteration bool + Iteration uint32 + HasThrottle bool + AutoConvergeThrottle uint32 + Throttle float64 + DataTotalMiB float64 + DataProcessedMiB float64 + DataRemainingMiB float64 } -type Progress struct{} +type Progress struct { + store *Store +} func NewProgress() *Progress { - return &Progress{} + return &Progress{store: NewStore()} +} + +func (p *Progress) Forget(uid types.UID) { + if p == nil || p.store == nil || uid == "" { + return + } + p.store.Delete(uid) } func (p *Progress) SyncProgress(record Record) int32 { - elapsed := max(record.Now.Sub(record.StartedAt), 0) - elapsedSec := elapsed.Seconds() + state := p.getState(record) + prev := clampSyncRange(record.PreviousProgress) + if state.Progress > prev { + prev = state.Progress + } - metricPct, hasMetric := metricPercent(record) - var internal float64 - - if isIterative(record, elapsedSec) { - iterTime := progressBulkCeiling + math.Max(0, elapsedSec-progressBulkDurationGuess)*(progressIterBaseTimeRate+clampFloat(record.Throttle, 0, 1)*progressIterThrottleRate) - iterMetric := iterativeMetricPercent(record, metricPct, hasMetric) - if hasMetric { - internal = progressIterWeightMetric*iterMetric + progressIterWeightTime*iterTime - } else { - internal = iterTime - } - internal = clampFloat(internal, progressBulkCeiling, progressIterativeCeiling) - } else { - bulkTime := progressStartPercent + elapsedSec*progressBulkTimeRate - if hasMetric { - internal = progressBulkWeightMetric*metricPct + progressBulkWeightTime*bulkTime - } else { - internal = bulkTime + elapsed := record.Now.Sub(record.StartedAt) + if elapsed < 0 { + elapsed = 0 + } + + iterative := isIterative(record) + if iterative && !state.Iterative { + state.Iterative = true + state.IterativeSince = record.Now + if prev < int32(iterativeFloor) { + prev = int32(iterativeFloor) } - internal = clampFloat(internal, progressStartPercent, progressBulkCeiling) } - syncProgress := mapToSyncRange(internal) - return applyMonotonicStallBump(record.PreviousProgress, syncProgress, elapsedSec, isIterative(record, elapsedSec)) + target := bulkTarget(record, elapsed) + if iterative { + target = iterativeTarget(record, state, prev) + } + + next := smoothProgress(prev, target, iterative, record.Throttle) + next = applyStatefulStall(record, state, next, iterative) + next = clampSyncRange(maxInt32(prev, next)) + + updateMetricState(record, state) + state.Progress = next + state.LastUpdatedAt = record.Now + state.LastIteration = record.Iteration + state.Iterative = iterative + + if record.OperationUID != "" { + p.store.Store(record.OperationUID, state) + } + + return next +} + +func (p *Progress) getState(record Record) State { + if p == nil || p.store == nil || record.OperationUID == "" { + return State{Progress: clampSyncRange(record.PreviousProgress), LastMetricAt: record.Now} + } + state, ok := p.store.Load(record.OperationUID) + if !ok { + state = State{Progress: clampSyncRange(record.PreviousProgress), LastMetricAt: record.Now} + } + return state } func metricPercent(record Record) (float64, bool) { @@ -116,7 +143,6 @@ func normalizedProcessedMiB(record Record) (float64, bool) { if record.DataTotalMiB <= 0 { return 0, false } - if record.DataProcessedMiB >= 0 { return clampFloat(record.DataProcessedMiB, 0, record.DataTotalMiB), true } @@ -126,59 +152,130 @@ func normalizedProcessedMiB(record Record) (float64, bool) { return 0, false } -func iterativeMetricPercent(record Record, metricPct float64, hasMetric bool) float64 { +func bulkTarget(record Record, elapsed time.Duration) float64 { + timeTarget := float64(SyncRangeMin) + math.Min(14, elapsed.Seconds()/8) + metricPct, hasMetric := metricPercent(record) + if !hasMetric { + return clampFloat(timeTarget, float64(SyncRangeMin), bulkCeiling) + } + metricTarget := float64(SyncRangeMin) + (metricPct/100.0)*(bulkCeiling-float64(SyncRangeMin)) + mixed := metricTarget*0.78 + timeTarget*0.22 + return clampFloat(mixed, float64(SyncRangeMin), bulkCeiling) +} + +func iterativeTarget(record Record, state State, current int32) float64 { + baseline := math.Max(float64(current), iterativeFloor) + if record.HasIteration { + baseline = math.Max(baseline, iterativeFloor+math.Min(float64(record.Iteration), 6)*1.5) + } + + target := baseline + metricPct, hasMetric := metricPercent(record) if hasMetric { - if record.DataTotalMiB > 0 && record.DataRemainingMiB >= 0 { - remainingRatio := clampFloat(record.DataRemainingMiB/record.DataTotalMiB, 0.0001, 1) - shaped := 1 - math.Log1p(remainingRatio*9)/math.Log(10) - return clampFloat(progressBulkCeiling+shaped*(progressIterativeCeiling-progressBulkCeiling), progressBulkCeiling, progressIterativeCeiling) - } - return clampFloat(progressBulkCeiling+(metricPct/100.0)*(progressIterativeCeiling-progressBulkCeiling), progressBulkCeiling, progressIterativeCeiling) + target = math.Max(target, iterativeMetricTarget(record, metricPct)) + } + + iterativeSince := state.IterativeSince + if iterativeSince.IsZero() { + iterativeSince = record.Now } - return progressBulkCeiling + iterElapsed := record.Now.Sub(iterativeSince) + if iterElapsed < 0 { + iterElapsed = 0 + } + target += math.Min(10, iterElapsed.Seconds()/12) + if record.HasThrottle { + target += record.Throttle * 6 + } + if !hasMetric { + target += math.Min(34, iterElapsed.Seconds()/20) + } + + return clampFloat(target, iterativeFloor, iterativeCeiling) } -func isIterative(record Record, elapsedSec float64) bool { - if record.Iteration > 0 { - return true +func iterativeMetricTarget(record Record, metricPct float64) float64 { + if record.DataTotalMiB > 0 && record.DataRemainingMiB >= 0 { + remainingRatio := clampFloat(record.DataRemainingMiB/record.DataTotalMiB, 0.0001, 1) + shaped := 1 - math.Log1p(remainingRatio*9)/math.Log(10) + return clampFloat(iterativeFloor+shaped*(iterativeCeiling-iterativeFloor), iterativeFloor, iterativeCeiling) } - if record.Mode == virtv1.MigrationPostCopy || record.Mode == virtv1.MigrationPaused { + return clampFloat(iterativeFloor+(metricPct/100.0)*(iterativeCeiling-iterativeFloor), iterativeFloor, iterativeCeiling) +} + +func isIterative(record Record) bool { + if record.HasIteration && record.Iteration > 0 { return true } - if record.Phase == virtv1.MigrationRunning || record.Phase == virtv1.MigrationSynchronizing { - return elapsedSec >= progressBulkDurationGuess - } - return false + return record.Mode == virtv1.MigrationPostCopy || record.Mode == virtv1.MigrationPaused } -func applyMonotonicStallBump(previous, current int32, elapsedSec float64, iterative bool) int32 { - prev := clampSyncRange(previous) - base := clampSyncRange(current) - if base < prev { - if prev-base <= 1 { - return prev - } - base = prev +func smoothProgress(current int32, target float64, iterative bool, throttle float64) int32 { + delta := target - float64(current) + if delta <= 0 { + return current + } + factor := 0.40 + if iterative { + factor = 0.28 } - if base > prev { - return base + step := math.Max(1, math.Round(delta*factor)) + if iterative && throttle > 0 { + step += math.Round(throttle * 2) } + return current + int32(step) +} - window := float64(progressBulkStallSeconds) +func applyStatefulStall(record Record, state State, current int32, iterative bool) int32 { + window := bulkStallWindow if iterative { - window = float64(progressIterStallSeconds) + window = iterStallWindow + } + lastMetricAt := state.LastMetricAt + if lastMetricAt.IsZero() { + lastMetricAt = record.Now + } + if record.Now.Sub(lastMetricAt) < window { + return current } - if elapsedSec >= window { - return clampSyncRange(prev + 1) + bump := int32(1) + if iterative && record.HasThrottle && record.Throttle >= 0.5 { + bump = 2 } - return prev + return clampSyncRange(current + bump) } -func mapToSyncRange(internal float64) int32 { - normalized := (clampFloat(internal, progressStartPercent, progressIterativeCeiling) - progressStartPercent) / - (progressIterativeCeiling - progressStartPercent) - mapped := float64(SyncRangeMin) + normalized*float64(SyncRangeMax-SyncRangeMin) - return clampSyncRange(int32(math.Round(mapped))) +func updateMetricState(record Record, state State) { + if !metricChanged(record, state) { + return + } + state.LastMetricAt = record.Now + state.LastProcessedMiB = record.DataProcessedMiB + state.LastRemainingMiB = record.DataRemainingMiB +} + +func metricChanged(record Record, state State) bool { + if state.LastMetricAt.IsZero() { + return true + } + if record.DataProcessedMiB >= 0 && !almostEqual(record.DataProcessedMiB, state.LastProcessedMiB) { + return true + } + if record.DataRemainingMiB >= 0 && !almostEqual(record.DataRemainingMiB, state.LastRemainingMiB) { + return true + } + return false +} + +func almostEqual(a, b float64) bool { + return math.Abs(a-b) < 0.01 +} + +func maxInt32(a, b int32) int32 { + if a > b { + return a + } + return b } func clampFloat(v, minV, maxV float64) float64 { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 949f0109e1..85d85d8755 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "k8s.io/apimachinery/pkg/types" virtv1 "kubevirt.io/api/core/v1" ) @@ -28,13 +29,15 @@ func TestProgress_MonotonicGrowth(t *testing.T) { p := NewProgress() first := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), Now: now, StartedAt: now.Add(-20 * time.Second), PreviousProgress: 10, Phase: virtv1.MigrationRunning, }) second := p.SyncProgress(Record{ - Now: now, + OperationUID: types.UID("vmop"), + Now: now.Add(40 * time.Second), StartedAt: now.Add(-80 * time.Second), PreviousProgress: first, Phase: virtv1.MigrationRunning, @@ -50,16 +53,20 @@ func TestProgress_SyncRangeCaps(t *testing.T) { p := NewProgress() progress := p.SyncProgress(Record{ - Now: now, - StartedAt: now.Add(-2 * time.Hour), - PreviousProgress: 10, - Phase: virtv1.MigrationRunning, - Mode: virtv1.MigrationPostCopy, - Iteration: 1, - Throttle: 1, - DataTotalMiB: 1024, - DataProcessedMiB: 2048, - DataRemainingMiB: 0, + OperationUID: types.UID("vmop"), + Now: now, + StartedAt: now.Add(-2 * time.Hour), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + Mode: virtv1.MigrationPostCopy, + HasIteration: true, + Iteration: 1, + HasThrottle: true, + AutoConvergeThrottle: 100, + Throttle: 1, + DataTotalMiB: 1024, + DataProcessedMiB: 2048, + DataRemainingMiB: 0, }) if progress < SyncRangeMin || progress > SyncRangeMax { @@ -70,16 +77,25 @@ func TestProgress_SyncRangeCaps(t *testing.T) { func TestProgress_StallBump(t *testing.T) { now := time.Now() p := NewProgress() + uid := types.UID("vmop") - progress := p.SyncProgress(Record{ + first := p.SyncProgress(Record{ + OperationUID: uid, Now: now, StartedAt: now.Add(-50 * time.Second), PreviousProgress: 70, Phase: virtv1.MigrationRunning, }) + progress := p.SyncProgress(Record{ + OperationUID: uid, + Now: now.Add(bulkStallWindow + time.Second), + StartedAt: now.Add(-50 * time.Second), + PreviousProgress: first, + Phase: virtv1.MigrationRunning, + }) - if progress != 71 { - t.Fatalf("expected stall bump to increase progress to 71, got=%d", progress) + if progress <= first { + t.Fatalf("expected stall bump to increase progress beyond %d, got=%d", first, progress) } } @@ -88,6 +104,7 @@ func TestProgress_DegradedModeWithoutMetrics(t *testing.T) { p := NewProgress() progress := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), Now: now, StartedAt: now.Add(-2 * time.Minute), PreviousProgress: 10, @@ -107,6 +124,7 @@ func TestProgress_WithMetricsInBulkPhase(t *testing.T) { p := NewProgress() progress := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), Now: now, StartedAt: now.Add(-30 * time.Second), PreviousProgress: 10, @@ -120,11 +138,13 @@ func TestProgress_WithMetricsInBulkPhase(t *testing.T) { } } -func TestProgress_WithMetricsInIterativePhase(t *testing.T) { +func TestProgress_EntersIterativePhaseByIteration(t *testing.T) { now := time.Now() p := NewProgress() + uid := types.UID("vmop") bulk := p.SyncProgress(Record{ + OperationUID: uid, Now: now, StartedAt: now.Add(-30 * time.Second), PreviousProgress: 10, @@ -133,23 +153,23 @@ func TestProgress_WithMetricsInIterativePhase(t *testing.T) { DataProcessedMiB: 512, }) iterative := p.SyncProgress(Record{ - Now: now, - StartedAt: now.Add(-3 * time.Minute), - PreviousProgress: bulk, - Phase: virtv1.MigrationRunning, - Mode: virtv1.MigrationPostCopy, - Iteration: 1, - Throttle: 1, - DataTotalMiB: 1024, - DataRemainingMiB: 64, + OperationUID: uid, + Now: now.Add(40 * time.Second), + StartedAt: now.Add(-3 * time.Minute), + PreviousProgress: bulk, + Phase: virtv1.MigrationRunning, + HasIteration: true, + Iteration: 2, + HasThrottle: true, + AutoConvergeThrottle: 50, + Throttle: 0.5, + DataTotalMiB: 1024, + DataRemainingMiB: 64, }) if iterative <= bulk { t.Fatalf("expected iterative progress to be greater than bulk progress, bulk=%d iterative=%d", bulk, iterative) } - if iterative < SyncRangeMin || iterative > SyncRangeMax { - t.Fatalf("expected iterative progress in sync range [%d,%d], got=%d", SyncRangeMin, SyncRangeMax, iterative) - } } func TestProgress_UsesRemainingDataFallback(t *testing.T) { @@ -157,6 +177,7 @@ func TestProgress_UsesRemainingDataFallback(t *testing.T) { p := NewProgress() progress := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), Now: now, StartedAt: now.Add(-90 * time.Second), PreviousProgress: 10, @@ -202,6 +223,7 @@ func TestProgress_ZeroElapsed(t *testing.T) { p := NewProgress() progress := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), Now: now, StartedAt: now, PreviousProgress: SyncRangeMin, @@ -213,21 +235,22 @@ func TestProgress_ZeroElapsed(t *testing.T) { } } -func TestProgress_VeryLargeElapsedIsCapped(t *testing.T) { +func TestProgress_VeryLargeElapsedStaysInRange(t *testing.T) { now := time.Now() p := NewProgress() progress := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), Now: now, StartedAt: now.Add(-24 * time.Hour), PreviousProgress: 10, Phase: virtv1.MigrationRunning, - Mode: virtv1.MigrationPostCopy, - Iteration: 1, + HasIteration: true, + Iteration: 5, }) - if progress != SyncRangeMax { - t.Fatalf("expected capped progress=%d, got=%d", SyncRangeMax, progress) + if progress < int32(iterativeFloor) || progress > SyncRangeMax { + t.Fatalf("expected progress in iterative range [%d,%d], got=%d", int32(iterativeFloor), SyncRangeMax, progress) } } @@ -235,63 +258,42 @@ func TestIsIterative(t *testing.T) { tests := []struct { name string record Record - elapsed float64 expected bool }{ { name: "iteration implies iterative", - record: Record{Iteration: 1}, - elapsed: 1, + record: Record{HasIteration: true, Iteration: 1}, expected: true, }, { name: "post copy mode implies iterative", record: Record{Mode: virtv1.MigrationPostCopy}, - elapsed: 1, - expected: true, - }, - { - name: "long running implies iterative", - record: Record{Phase: virtv1.MigrationRunning}, - elapsed: progressBulkDurationGuess, expected: true, }, { - name: "short pre-copy is not iterative", - record: Record{Phase: virtv1.MigrationRunning}, - elapsed: progressBulkDurationGuess - 1, + name: "pre-copy without iteration is not iterative", + record: Record{Mode: virtv1.MigrationPreCopy}, expected: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := isIterative(tt.record, tt.elapsed); got != tt.expected { + if got := isIterative(tt.record); got != tt.expected { t.Fatalf("isIterative() = %v, want %v", got, tt.expected) } }) } } -func TestProgress_StallBumpNotAppliedEarly(t *testing.T) { - got := applyMonotonicStallBump(70, 70, float64(progressBulkStallSeconds-1), false) - if got != 70 { - t.Fatalf("expected no stall bump before window, got=%d", got) - } -} +func TestForget_RemovesState(t *testing.T) { + p := NewProgress() + uid := types.UID("vmop") + p.store.Store(uid, State{Progress: 55}) -func TestProgress_StallBumpDoesNotRepeatOnRegressedBase(t *testing.T) { - got := applyMonotonicStallBump(71, 70, float64(progressBulkStallSeconds+10), false) - if got != 71 { - t.Fatalf("expected previous progress to be preserved without repeated bump, got=%d", got) - } -} + p.Forget(uid) -func TestMapToSyncRangeBoundaries(t *testing.T) { - if got := mapToSyncRange(progressStartPercent); got != SyncRangeMin { - t.Fatalf("expected lower boundary=%d, got=%d", SyncRangeMin, got) - } - if got := mapToSyncRange(progressIterativeCeiling); got != SyncRangeMax { - t.Fatalf("expected upper boundary=%d, got=%d", SyncRangeMax, got) + if p.store.Len() != 0 { + t.Fatalf("expected empty store after forget, got=%d", p.store.Len()) } } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go new file mode 100644 index 0000000000..b87ebfbb78 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go @@ -0,0 +1,75 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package progress + +import ( + "sync" + "time" + + "k8s.io/apimachinery/pkg/types" +) + +type State struct { + Progress int32 + Iterative bool + IterativeSince time.Time + LastUpdatedAt time.Time + LastMetricAt time.Time + LastIteration uint32 + LastProcessedMiB float64 + LastRemainingMiB float64 +} + +type Store struct { + mu sync.RWMutex + states map[types.UID]State +} + +func NewStore() *Store { + return &Store{states: make(map[types.UID]State)} +} + +func (s *Store) Load(uid types.UID) (State, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + state, ok := s.states[uid] + return state, ok +} + +func (s *Store) Store(uid types.UID, state State) { + if uid == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + s.states[uid] = state +} + +func (s *Store) Delete(uid types.UID) { + if uid == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + delete(s.states, uid) +} + +func (s *Store) Len() int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.states) +} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store_test.go new file mode 100644 index 0000000000..c389cc9b94 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store_test.go @@ -0,0 +1,60 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package progress + +import ( + "testing" + "time" + + "k8s.io/apimachinery/pkg/types" +) + +func TestStore_LoadStoreDelete(t *testing.T) { + store := NewStore() + uid := types.UID("vmop") + state := State{ + Progress: 42, + Iterative: true, + IterativeSince: time.Unix(100, 0), + } + + store.Store(uid, state) + + loaded, ok := store.Load(uid) + if !ok { + t.Fatal("expected state to be present") + } + if loaded.Progress != 42 || !loaded.Iterative { + t.Fatalf("unexpected loaded state: %+v", loaded) + } + + store.Delete(uid) + + if _, ok := store.Load(uid); ok { + t.Fatal("expected state to be removed") + } +} + +func TestStore_IgnoresEmptyUID(t *testing.T) { + store := NewStore() + store.Store("", State{Progress: 10}) + store.Delete("") + + if store.Len() != 0 { + t.Fatalf("expected empty store, got=%d", store.Len()) + } +} From 0b844766f28350fd6fe683d3b240c928e29fa943 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 11:52:33 +0200 Subject: [PATCH 12/27] chore(core): remove unused migration helper Signed-off-by: Daniil Antoshin --- .../controller/vmop/migration/internal/progress/mapper.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index 17f125a58f..4d4b7553d2 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -19,7 +19,6 @@ package progress import ( "time" - "k8s.io/apimachinery/pkg/types" virtv1 "kubevirt.io/api/core/v1" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -97,10 +96,3 @@ func normalizeThrottle(raw uint32, ok bool) float64 { } return clampFloat(float64(raw)/100.0, 0, 1) } - -func operationUID(vmop *v1alpha2.VirtualMachineOperation) types.UID { - if vmop == nil { - return "" - } - return vmop.UID -} From d0fc6ab6083fe7b84241474ad1c24228168833f4 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 12:01:43 +0200 Subject: [PATCH 13/27] chore(core): update kubevirt api replace Signed-off-by: Daniil Antoshin --- images/virtualization-artifact/go.mod | 2 +- images/virtualization-artifact/go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/images/virtualization-artifact/go.mod b/images/virtualization-artifact/go.mod index 6f50f27f04..f3b5835c2b 100644 --- a/images/virtualization-artifact/go.mod +++ b/images/virtualization-artifact/go.mod @@ -168,4 +168,4 @@ replace ( ) // Kubevirt API replaces -replace kubevirt.io/api => ../../../3p-kubevirt/staging/src/kubevirt.io/api +replace kubevirt.io/api => github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403095053-aefa74c02fee diff --git a/images/virtualization-artifact/go.sum b/images/virtualization-artifact/go.sum index cd0b924afb..03d6bf401a 100644 --- a/images/virtualization-artifact/go.sum +++ b/images/virtualization-artifact/go.sum @@ -49,6 +49,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403095053-aefa74c02fee h1:FL3Sn9OL9HZZX01vWiO6t6ps8nkxH+AOilBp+Rdp6iU= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403095053-aefa74c02fee/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5 h1:PsN1E0oxC/+4zdA977txrqUCuObFL3HAuu5Xnud8m8c= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5/go.mod h1:Mk5HRzkc5pIcDIZ2JJ6DPuuqnwhXVkb3you8M8Mg+4w= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= From 2a4dee0af373f2a2bf23f2a553de3a4ce2768661 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 12:08:42 +0200 Subject: [PATCH 14/27] refactor(core): use ptr helper for migration progress Signed-off-by: Daniil Antoshin --- .../vmop/migration/internal/handler/lifecycle.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 582454946c..f2e29e6963 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -25,6 +25,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" virtv1 "kubevirt.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -289,7 +290,7 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp reason := h.getFailedReason(mig) msg := h.getFailedMessage(reason, mig) progress := h.calculateMigrationProgress(vmop, mig, reason) - vmop.Status.Progress = ptrToInt32(progress) + vmop.Status.Progress = ptr.To(progress) completedCond. Status(metav1.ConditionFalse). @@ -300,7 +301,7 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp case virtv1.MigrationSucceeded: vmop.Status.Phase = v1alpha2.VMOPPhaseCompleted h.recorder.Event(vmop, corev1.EventTypeNormal, v1alpha2.ReasonVMOPSucceeded, "VirtualMachineOperation succeeded") - vmop.Status.Progress = ptrToInt32(100) + vmop.Status.Progress = ptr.To(int32(100)) completedCond. Status(metav1.ConditionTrue). @@ -320,7 +321,7 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp vmop.Status.Phase = v1alpha2.VMOPPhasePending } progress := h.calculateMigrationProgress(vmop, mig, reason) - vmop.Status.Progress = ptrToInt32(progress) + vmop.Status.Progress = ptr.To(progress) completedCond. Status(metav1.ConditionFalse). @@ -383,7 +384,7 @@ func (h LifecycleHandler) canExecute(vmop *v1alpha2.VirtualMachineOperation, vm if migratable.Status == metav1.ConditionTrue { vmop.Status.Phase = v1alpha2.VMOPPhasePending - vmop.Status.Progress = ptrToInt32(1) + vmop.Status.Progress = ptr.To(int32(1)) conditions.SetCondition( conditions.NewConditionBuilder(vmopcondition.TypeCompleted). Generation(vmop.GetGeneration()). @@ -437,7 +438,7 @@ func (h LifecycleHandler) execute(ctx context.Context, vmop *v1alpha2.VirtualMac vmop.Status.Phase = v1alpha2.VMOPPhasePending } progress := h.calculateMigrationProgress(vmop, mig, reason) - vmop.Status.Progress = ptrToInt32(progress) + vmop.Status.Progress = ptr.To(progress) conditions.SetCondition( conditions.NewConditionBuilder(vmopcondition.TypeCompleted). @@ -492,10 +493,6 @@ func getMessageByMigrationFailedReason(mig *virtv1.VirtualMachineInstanceMigrati return "" } -func ptrToInt32(v int32) *int32 { - return &v -} - func (h LifecycleHandler) getFailedReason(mig *virtv1.VirtualMachineInstanceMigration) vmopcondition.ReasonCompleted { if mig == nil { return vmopcondition.ReasonFailed From 001bb0b68da337007b47c5e7046630a975891bda Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 12:50:38 +0200 Subject: [PATCH 15/27] fix(core): align migration progress iterative entry Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 2 ++ .../internal/handler/lifecycle_test.go | 24 +++++++++++++++++++ .../migration/internal/progress/progress.go | 5 +--- .../internal/progress/progress_test.go | 9 +++++-- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index f2e29e6963..937dddc0dc 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -90,12 +90,14 @@ func NewLifecycleHandler(client client.Client, migration *migrationservice.Migra func (h LifecycleHandler) Handle(ctx context.Context, vmop *v1alpha2.VirtualMachineOperation) (reconcile.Result, error) { // Do not update conditions for object in the deletion state. if commonvmop.IsTerminating(vmop) { + h.forgetProgress(vmop) vmop.Status.Phase = v1alpha2.VMOPPhaseTerminating return reconcile.Result{}, nil } // Ignore if VMOP is in final state. if commonvmop.IsFinished(vmop) { + h.forgetProgress(vmop) return reconcile.Result{}, nil } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 6362f54299..2dd7ed24bf 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -273,6 +273,30 @@ var _ = Describe("LifecycleHandler", func() { Expect(h.getFailedReason(nil)).To(Equal(vmopcondition.ReasonFailed)) }) + It("should forget progress for terminating vmop", func() { + stub := &progressStrategyStub{} + vmop := newVMOPMigrate() + now := metav1.Now() + vmop.DeletionTimestamp = &now + h := LifecycleHandler{progressStrategy: stub} + + _, err := h.Handle(ctx, vmop) + Expect(err).NotTo(HaveOccurred()) + Expect(vmop.Status.Phase).To(Equal(v1alpha2.VMOPPhaseTerminating)) + Expect(stub.forgotten).To(Equal([]types.UID{vmop.UID})) + }) + + It("should forget progress for finished vmop", func() { + stub := &progressStrategyStub{} + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseCompleted + h := LifecycleHandler{progressStrategy: stub} + + _, err := h.Handle(ctx, vmop) + Expect(err).NotTo(HaveOccurred()) + Expect(stub.forgotten).To(Equal([]types.UID{vmop.UID})) + }) + DescribeTable("should detect failed reason", func(mig *virtv1.VirtualMachineInstanceMigration, expected vmopcondition.ReasonCompleted) { h := LifecycleHandler{} Expect(h.getFailedReason(mig)).To(Equal(expected)) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 585a4e98b2..7e458c51e5 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -204,10 +204,7 @@ func iterativeMetricTarget(record Record, metricPct float64) float64 { } func isIterative(record Record) bool { - if record.HasIteration && record.Iteration > 0 { - return true - } - return record.Mode == virtv1.MigrationPostCopy || record.Mode == virtv1.MigrationPaused + return record.HasIteration && record.Iteration > 0 } func smoothProgress(current int32, target float64, iterative bool, throttle float64) int32 { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 85d85d8755..c472a3ee52 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -266,9 +266,14 @@ func TestIsIterative(t *testing.T) { expected: true, }, { - name: "post copy mode implies iterative", + name: "post copy without iteration is not iterative", record: Record{Mode: virtv1.MigrationPostCopy}, - expected: true, + expected: false, + }, + { + name: "paused without iteration is not iterative", + record: Record{Mode: virtv1.MigrationPaused}, + expected: false, }, { name: "pre-copy without iteration is not iterative", From a54cb5cd3b39b84c3d08e6bd0f868901d611b293 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 13:06:24 +0200 Subject: [PATCH 16/27] fix(core): slow down migration progress resync jumps Signed-off-by: Daniil Antoshin --- .../migration/internal/progress/progress.go | 40 ++++++++++++---- .../internal/progress/progress_test.go | 48 +++++++++++++++++++ 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 7e458c51e5..13075bf837 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -28,11 +28,13 @@ const ( SyncRangeMin int32 = 10 SyncRangeMax int32 = 90 - bulkCeiling = 48.0 - iterativeFloor = 46.0 - iterativeCeiling = 90.0 - bulkStallWindow = 45 * time.Second - iterStallWindow = 25 * time.Second + bulkCeiling = 48.0 + iterativeFloor = 46.0 + iterativeCeiling = 90.0 + bulkStallWindow = 45 * time.Second + iterStallWindow = 25 * time.Second + bulkUpdateInterval = time.Second + iterUpdateInterval = 2 * time.Second ) type Strategy interface { @@ -98,7 +100,8 @@ func (p *Progress) SyncProgress(record Record) int32 { target = iterativeTarget(record, state, prev) } - next := smoothProgress(prev, target, iterative, record.Throttle) + metricAdvanced := metricChanged(record, state) + next := smoothProgress(prev, target, iterative, record.Throttle, record.Now.Sub(state.LastUpdatedAt), metricAdvanced, state.LastUpdatedAt.IsZero()) next = applyStatefulStall(record, state, next, iterative) next = clampSyncRange(maxInt32(prev, next)) @@ -207,19 +210,36 @@ func isIterative(record Record) bool { return record.HasIteration && record.Iteration > 0 } -func smoothProgress(current int32, target float64, iterative bool, throttle float64) int32 { +func smoothProgress(current int32, target float64, iterative bool, throttle float64, sinceLast time.Duration, metricAdvanced, initial bool) int32 { delta := target - float64(current) if delta <= 0 { return current } + + if !initial { + minInterval := bulkUpdateInterval + if iterative { + minInterval = iterUpdateInterval + } + if sinceLast > 0 && sinceLast < minInterval { + return current + } + } + factor := 0.40 + maxStep := 6.0 if iterative { factor = 0.28 + maxStep = 4 + if throttle > 0 { + maxStep += math.Round(throttle * 2) + } } - step := math.Max(1, math.Round(delta*factor)) - if iterative && throttle > 0 { - step += math.Round(throttle * 2) + if metricAdvanced { + maxStep += 1 } + step := math.Max(1, math.Round(delta*factor)) + step = math.Min(step, maxStep) return current + int32(step) } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index c472a3ee52..5a782b5c9b 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -99,6 +99,54 @@ func TestProgress_StallBump(t *testing.T) { } } +func TestProgress_DoesNotAdvanceTooFastBetweenRapidSyncs(t *testing.T) { + now := time.Now() + p := NewProgress() + uid := types.UID("vmop") + + first := p.SyncProgress(Record{ + OperationUID: uid, + Now: now, + StartedAt: now.Add(-2 * time.Minute), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 900, + }) + second := p.SyncProgress(Record{ + OperationUID: uid, + Now: now.Add(200 * time.Millisecond), + StartedAt: now.Add(-2 * time.Minute), + PreviousProgress: first, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 900, + }) + + if second != first { + t.Fatalf("expected no progress change during rapid resync, first=%d second=%d", first, second) + } +} + +func TestProgress_CapsSingleBulkStep(t *testing.T) { + now := time.Now() + p := NewProgress() + + progress := p.SyncProgress(Record{ + OperationUID: types.UID("vmop"), + Now: now, + StartedAt: now.Add(-2 * time.Minute), + PreviousProgress: 10, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 1024, + }) + + if progress > 17 { + t.Fatalf("expected first bulk step to be capped, got=%d", progress) + } +} + func TestProgress_DegradedModeWithoutMetrics(t *testing.T) { now := time.Now() p := NewProgress() From 8fba32ee2ce4a5acf999b29dba5080bebd24d6fa Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 13:43:49 +0200 Subject: [PATCH 17/27] fix(core): pass state by pointer to persist metric tracking between syncs Signed-off-by: Daniil Antoshin --- .../migration/internal/progress/progress.go | 26 +++++++++---------- .../internal/progress/progress_test.go | 4 +-- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 13075bf837..9668db434d 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -90,22 +90,19 @@ func (p *Progress) SyncProgress(record Record) int32 { if iterative && !state.Iterative { state.Iterative = true state.IterativeSince = record.Now - if prev < int32(iterativeFloor) { - prev = int32(iterativeFloor) - } } target := bulkTarget(record, elapsed) if iterative { - target = iterativeTarget(record, state, prev) + target = iterativeTarget(record, &state, prev) } - metricAdvanced := metricChanged(record, state) + metricAdvanced := metricChanged(record, &state) next := smoothProgress(prev, target, iterative, record.Throttle, record.Now.Sub(state.LastUpdatedAt), metricAdvanced, state.LastUpdatedAt.IsZero()) - next = applyStatefulStall(record, state, next, iterative) + next = applyStatefulStall(record, &state, next, iterative) next = clampSyncRange(maxInt32(prev, next)) - updateMetricState(record, state) + updateMetricState(record, &state) state.Progress = next state.LastUpdatedAt = record.Now state.LastIteration = record.Iteration @@ -166,10 +163,11 @@ func bulkTarget(record Record, elapsed time.Duration) float64 { return clampFloat(mixed, float64(SyncRangeMin), bulkCeiling) } -func iterativeTarget(record Record, state State, current int32) float64 { - baseline := math.Max(float64(current), iterativeFloor) +func iterativeTarget(record Record, state *State, current int32) float64 { + baseline := float64(current) if record.HasIteration { - baseline = math.Max(baseline, iterativeFloor+math.Min(float64(record.Iteration), 6)*1.5) + iterationBoost := math.Min(float64(record.Iteration), 6) * 1.5 + baseline = math.Max(baseline, float64(current)+iterationBoost) } target := baseline @@ -194,7 +192,7 @@ func iterativeTarget(record Record, state State, current int32) float64 { target += math.Min(34, iterElapsed.Seconds()/20) } - return clampFloat(target, iterativeFloor, iterativeCeiling) + return clampFloat(target, float64(current), iterativeCeiling) } func iterativeMetricTarget(record Record, metricPct float64) float64 { @@ -243,7 +241,7 @@ func smoothProgress(current int32, target float64, iterative bool, throttle floa return current + int32(step) } -func applyStatefulStall(record Record, state State, current int32, iterative bool) int32 { +func applyStatefulStall(record Record, state *State, current int32, iterative bool) int32 { window := bulkStallWindow if iterative { window = iterStallWindow @@ -262,7 +260,7 @@ func applyStatefulStall(record Record, state State, current int32, iterative boo return clampSyncRange(current + bump) } -func updateMetricState(record Record, state State) { +func updateMetricState(record Record, state *State) { if !metricChanged(record, state) { return } @@ -271,7 +269,7 @@ func updateMetricState(record Record, state State) { state.LastRemainingMiB = record.DataRemainingMiB } -func metricChanged(record Record, state State) bool { +func metricChanged(record Record, state *State) bool { if state.LastMetricAt.IsZero() { return true } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 5a782b5c9b..5e34948994 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -297,8 +297,8 @@ func TestProgress_VeryLargeElapsedStaysInRange(t *testing.T) { Iteration: 5, }) - if progress < int32(iterativeFloor) || progress > SyncRangeMax { - t.Fatalf("expected progress in iterative range [%d,%d], got=%d", int32(iterativeFloor), SyncRangeMax, progress) + if progress < SyncRangeMin || progress > SyncRangeMax { + t.Fatalf("expected progress in range [%d,%d], got=%d", SyncRangeMin, SyncRangeMax, progress) } } From 68708ce38b28de325e2d6546eccf4baf22f95504 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 13:48:42 +0200 Subject: [PATCH 18/27] refactor(core): replace manual sync map with LRUExpireCache in progress store Signed-off-by: Daniil Antoshin --- .../vmop/migration/internal/progress/store.go | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go index b87ebfbb78..6d86896756 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go @@ -17,10 +17,15 @@ limitations under the License. package progress import ( - "sync" "time" "k8s.io/apimachinery/pkg/types" + utilcache "k8s.io/apimachinery/pkg/util/cache" +) + +const ( + storeMaxSize = 1024 + storeTTL = 30 * time.Minute ) type State struct { @@ -35,41 +40,35 @@ type State struct { } type Store struct { - mu sync.RWMutex - states map[types.UID]State + cache *utilcache.LRUExpireCache } func NewStore() *Store { - return &Store{states: make(map[types.UID]State)} + return &Store{cache: utilcache.NewLRUExpireCache(storeMaxSize)} } func (s *Store) Load(uid types.UID) (State, bool) { - s.mu.RLock() - defer s.mu.RUnlock() - state, ok := s.states[uid] - return state, ok + v, ok := s.cache.Get(uid) + if !ok { + return State{}, false + } + return v.(State), true } func (s *Store) Store(uid types.UID, state State) { if uid == "" { return } - s.mu.Lock() - defer s.mu.Unlock() - s.states[uid] = state + s.cache.Add(uid, state, storeTTL) } func (s *Store) Delete(uid types.UID) { if uid == "" { return } - s.mu.Lock() - defer s.mu.Unlock() - delete(s.states, uid) + s.cache.Remove(uid) } func (s *Store) Len() int { - s.mu.RLock() - defer s.mu.RUnlock() - return len(s.states) + return len(s.cache.Keys()) } From f8ee47e4ecb4299962737082c07abcbfab1fa02f Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 14:27:07 +0200 Subject: [PATCH 19/27] refactor(core): rewrite migration progress engine with EMA smoothing and adaptive stall Signed-off-by: Daniil Antoshin --- .../migration/internal/progress/progress.go | 296 +++++++++++------- .../internal/progress/progress_test.go | 287 ++++++++++++----- .../vmop/migration/internal/progress/store.go | 21 +- 3 files changed, 398 insertions(+), 206 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index 9668db434d..e910ab0fc0 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -28,13 +28,23 @@ const ( SyncRangeMin int32 = 10 SyncRangeMax int32 = 90 - bulkCeiling = 48.0 - iterativeFloor = 46.0 - iterativeCeiling = 90.0 - bulkStallWindow = 45 * time.Second - iterStallWindow = 25 * time.Second - bulkUpdateInterval = time.Second - iterUpdateInterval = 2 * time.Second + bulkCeiling = 45.0 + iterativeFloor = 45.0 + iterativeCeiling = 90.0 + thresholdFactor = 0.05 + + bulkTimeRate = 0.55 + iterBaseTimeRate = 0.022 + iterThrottleRate = 0.0012 + bulkMetricWeight = 0.80 + bulkTimeWeight = 0.20 + iterMetricWeight = 0.76 + iterTimeWeight = 0.24 + smoothAlphaUp = 0.18 + smoothAlphaDown = 0.34 + bulkStallSeconds = 10.0 + iterStallSeconds = 8.0 + finalStallSeconds = 6.0 ) type Strategy interface { @@ -76,6 +86,7 @@ func (p *Progress) Forget(uid types.UID) { func (p *Progress) SyncProgress(record Record) int32 { state := p.getState(record) + prev := clampSyncRange(record.PreviousProgress) if state.Progress > prev { prev = state.Progress @@ -85,22 +96,57 @@ func (p *Progress) SyncProgress(record Record) int32 { if elapsed < 0 { elapsed = 0 } + elapsedSec := elapsed.Seconds() iterative := isIterative(record) if iterative && !state.Iterative { state.Iterative = true state.IterativeSince = record.Now + p.initIterative(record, &state, elapsedSec) + } + + if iterative { + observeRemaining(record, &state) + } + + target := bulkTarget(record, elapsedSec) + if iterative { + target = iterativeTarget(record, &state, elapsedSec) } - target := bulkTarget(record, elapsed) + maxStep := int32(10) if iterative { - target = iterativeTarget(record, &state, prev) + maxStep = 5 + } + + cap := stageCap(iterative) + progress := math.Max(float64(prev), math.Min(target, cap)) + next := clampPercent(progress) + if next < prev { + next = prev + } + if next > prev+maxStep { + next = prev + maxStep + } + + if next == prev && float64(next) < cap { + lastIncrease := state.LastIncreaseAt + if lastIncrease.IsZero() { + lastIncrease = record.StartedAt + } + stallWin := stallWindow(record, &state, iterative) + if record.Now.Sub(lastIncrease).Seconds() >= stallWin { + next++ + } } - metricAdvanced := metricChanged(record, &state) - next := smoothProgress(prev, target, iterative, record.Throttle, record.Now.Sub(state.LastUpdatedAt), metricAdvanced, state.LastUpdatedAt.IsZero()) - next = applyStatefulStall(record, &state, next, iterative) - next = clampSyncRange(maxInt32(prev, next)) + if float64(next) > cap { + next = int32(cap) + } + + if next > prev { + state.LastIncreaseAt = record.Now + } updateMetricState(record, &state) state.Progress = next @@ -121,143 +167,161 @@ func (p *Progress) getState(record Record) State { } state, ok := p.store.Load(record.OperationUID) if !ok { - state = State{Progress: clampSyncRange(record.PreviousProgress), LastMetricAt: record.Now} + state = State{ + Progress: clampSyncRange(record.PreviousProgress), + LastMetricAt: record.Now, + } } return state } -func metricPercent(record Record) (float64, bool) { - if record.DataTotalMiB <= 0 { - return 0, false +func (p *Progress) initIterative(record Record, state *State, _ float64) { + total := record.DataTotalMiB + if total <= 0 { + total = 1 + } + if total > state.InitialTotal { + state.InitialTotal = total + } + if state.InitialTotal <= 0 { + state.InitialTotal = total } - processed, hasProcessed := normalizedProcessedMiB(record) - if !hasProcessed { - return 0, false + remaining := maxRemaining(record) + if remaining <= 0 { + remaining = state.InitialTotal } - return clampFloat((processed/record.DataTotalMiB)*100.0, 0, 100), true + state.Threshold = math.Max(math.Ceil(state.InitialTotal*thresholdFactor), 1) + state.InitialRemaining = math.Max(remaining, state.Threshold) + state.SmoothedRemaining = state.InitialRemaining } -func normalizedProcessedMiB(record Record) (float64, bool) { - if record.DataTotalMiB <= 0 { - return 0, false +func observeRemaining(record Record, state *State) { + remaining := maxRemaining(record) + if remaining <= 0 { + return + } + + alpha := smoothAlphaUp + if remaining < state.SmoothedRemaining { + alpha = smoothAlphaDown } - if record.DataProcessedMiB >= 0 { - return clampFloat(record.DataProcessedMiB, 0, record.DataTotalMiB), true + if record.Throttle >= 0.80 { + alpha += 0.08 } - if record.DataRemainingMiB >= 0 { - return clampFloat(record.DataTotalMiB-record.DataRemainingMiB, 0, record.DataTotalMiB), true + if alpha > 0.90 { + alpha = 0.90 } - return 0, false -} -func bulkTarget(record Record, elapsed time.Duration) float64 { - timeTarget := float64(SyncRangeMin) + math.Min(14, elapsed.Seconds()/8) - metricPct, hasMetric := metricPercent(record) - if !hasMetric { - return clampFloat(timeTarget, float64(SyncRangeMin), bulkCeiling) + if state.SmoothedRemaining <= 0 { + state.SmoothedRemaining = remaining + } else { + state.SmoothedRemaining = alpha*remaining + (1-alpha)*state.SmoothedRemaining } - metricTarget := float64(SyncRangeMin) + (metricPct/100.0)*(bulkCeiling-float64(SyncRangeMin)) - mixed := metricTarget*0.78 + timeTarget*0.22 - return clampFloat(mixed, float64(SyncRangeMin), bulkCeiling) } -func iterativeTarget(record Record, state *State, current int32) float64 { - baseline := float64(current) - if record.HasIteration { - iterationBoost := math.Min(float64(record.Iteration), 6) * 1.5 - baseline = math.Max(baseline, float64(current)+iterationBoost) +func bulkTarget(record Record, elapsedSec float64) float64 { + total := record.DataTotalMiB + if total <= 0 { + total = 1 } - target := baseline - metricPct, hasMetric := metricPercent(record) - if hasMetric { - target = math.Max(target, iterativeMetricTarget(record, metricPct)) - } + processed := math.Max(record.DataProcessedMiB, 0) + metricRatio := clampFloat(processed/total, 0, 1) + metricPct := float64(SyncRangeMin) + (bulkCeiling-float64(SyncRangeMin))*metricRatio - iterativeSince := state.IterativeSince - if iterativeSince.IsZero() { - iterativeSince = record.Now + timePct := float64(SyncRangeMin) + elapsedSec*bulkTimeRate + if timePct > bulkCeiling { + timePct = bulkCeiling } - iterElapsed := record.Now.Sub(iterativeSince) - if iterElapsed < 0 { - iterElapsed = 0 - } - target += math.Min(10, iterElapsed.Seconds()/12) - if record.HasThrottle { - target += record.Throttle * 6 + + return bulkMetricWeight*metricPct + bulkTimeWeight*timePct +} + +func iterativeTarget(record Record, state *State, elapsedSec float64) float64 { + metricRatio := iterativeMetricRatio(state) + metricPct := iterativeFloor + (iterativeCeiling-5-iterativeFloor)*metricRatio + + throttle := record.Throttle + iterSince := state.IterativeSince + if iterSince.IsZero() { + iterSince = record.Now } - if !hasMetric { - target += math.Min(34, iterElapsed.Seconds()/20) + iterElapsed := math.Max(0, elapsedSec-record.Now.Sub(iterSince).Seconds()+record.Now.Sub(iterSince).Seconds()) + iterElapsedSec := math.Max(0, record.Now.Sub(iterSince).Seconds()) + + timeRate := iterBaseTimeRate + throttle*iterThrottleRate + timePct := iterativeFloor + iterElapsedSec*timeRate + if timePct > iterativeCeiling { + timePct = iterativeCeiling } + _ = iterElapsed - return clampFloat(target, float64(current), iterativeCeiling) + target := iterMetricWeight*metricPct + iterTimeWeight*timePct + return math.Min(target, iterativeCeiling) } -func iterativeMetricTarget(record Record, metricPct float64) float64 { - if record.DataTotalMiB > 0 && record.DataRemainingMiB >= 0 { - remainingRatio := clampFloat(record.DataRemainingMiB/record.DataTotalMiB, 0.0001, 1) - shaped := 1 - math.Log1p(remainingRatio*9)/math.Log(10) - return clampFloat(iterativeFloor+shaped*(iterativeCeiling-iterativeFloor), iterativeFloor, iterativeCeiling) +func iterativeMetricRatio(state *State) float64 { + if state.InitialRemaining <= state.Threshold { + return 1 } - return clampFloat(iterativeFloor+(metricPct/100.0)*(iterativeCeiling-iterativeFloor), iterativeFloor, iterativeCeiling) -} -func isIterative(record Record) bool { - return record.HasIteration && record.Iteration > 0 + current := math.Max(state.SmoothedRemaining, state.Threshold) + initial := math.Max(state.InitialRemaining, state.Threshold) + base := math.Log(initial / state.Threshold) + if base <= 0 { + return 1 + } + + ratio := 1 - math.Log(current/state.Threshold)/base + return clampFloat(ratio, 0, 1) } -func smoothProgress(current int32, target float64, iterative bool, throttle float64, sinceLast time.Duration, metricAdvanced, initial bool) int32 { - delta := target - float64(current) - if delta <= 0 { - return current +func stageCap(iterative bool) float64 { + if !iterative { + return bulkCeiling } + return iterativeCeiling +} - if !initial { - minInterval := bulkUpdateInterval - if iterative { - minInterval = iterUpdateInterval - } - if sinceLast > 0 && sinceLast < minInterval { - return current - } +func stallWindow(record Record, state *State, iterative bool) float64 { + if !iterative { + return bulkStallSeconds } - factor := 0.40 - maxStep := 6.0 - if iterative { - factor = 0.28 - maxStep = 4 - if throttle > 0 { - maxStep += math.Round(throttle * 2) - } + if state.Progress >= int32(iterativeCeiling)-2 { + return 24.0 } - if metricAdvanced { - maxStep += 1 + if state.Progress >= int32(iterativeCeiling)-5 { + return 14.0 } - step := math.Max(1, math.Round(delta*factor)) - step = math.Min(step, maxStep) - return current + int32(step) -} - -func applyStatefulStall(record Record, state *State, current int32, iterative bool) int32 { - window := bulkStallWindow - if iterative { - window = iterStallWindow + if state.SmoothedRemaining > 0 && state.SmoothedRemaining <= state.Threshold { + return finalStallSeconds } - lastMetricAt := state.LastMetricAt - if lastMetricAt.IsZero() { - lastMetricAt = record.Now + + window := iterStallSeconds - 3*record.Throttle + if window < finalStallSeconds { + return finalStallSeconds } - if record.Now.Sub(lastMetricAt) < window { - return current + return window +} + +func isIterative(record Record) bool { + return record.HasIteration && record.Iteration > 0 +} + +func maxRemaining(record Record) float64 { + if record.DataRemainingMiB > 0 { + return record.DataRemainingMiB } - bump := int32(1) - if iterative && record.HasThrottle && record.Throttle >= 0.5 { - bump = 2 + if record.DataTotalMiB > 0 && record.DataProcessedMiB >= 0 { + r := record.DataTotalMiB - record.DataProcessedMiB + if r > 0 { + return r + } } - return clampSyncRange(current + bump) + return 0 } func updateMetricState(record Record, state *State) { @@ -286,11 +350,15 @@ func almostEqual(a, b float64) bool { return math.Abs(a-b) < 0.01 } -func maxInt32(a, b int32) int32 { - if a > b { - return a +func clampPercent(v float64) int32 { + i := int32(v) + if i < SyncRangeMin { + return SyncRangeMin + } + if i > SyncRangeMax { + return SyncRangeMax } - return b + return i } func clampFloat(v, minV, maxV float64) float64 { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 5e34948994..660da41182 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -17,6 +17,7 @@ limitations under the License. package progress import ( + "math" "testing" "time" @@ -27,20 +28,27 @@ import ( func TestProgress_MonotonicGrowth(t *testing.T) { now := time.Now() p := NewProgress() + uid := types.UID("vmop") first := p.SyncProgress(Record{ - OperationUID: types.UID("vmop"), + OperationUID: uid, Now: now, StartedAt: now.Add(-20 * time.Second), PreviousProgress: 10, Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 100, + DataRemainingMiB: 900, }) second := p.SyncProgress(Record{ - OperationUID: types.UID("vmop"), + OperationUID: uid, Now: now.Add(40 * time.Second), StartedAt: now.Add(-80 * time.Second), PreviousProgress: first, Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 200, + DataRemainingMiB: 800, }) if second < first { @@ -58,7 +66,6 @@ func TestProgress_SyncRangeCaps(t *testing.T) { StartedAt: now.Add(-2 * time.Hour), PreviousProgress: 10, Phase: virtv1.MigrationRunning, - Mode: virtv1.MigrationPostCopy, HasIteration: true, Iteration: 1, HasThrottle: true, @@ -83,67 +90,30 @@ func TestProgress_StallBump(t *testing.T) { OperationUID: uid, Now: now, StartedAt: now.Add(-50 * time.Second), - PreviousProgress: 70, - Phase: virtv1.MigrationRunning, - }) - progress := p.SyncProgress(Record{ - OperationUID: uid, - Now: now.Add(bulkStallWindow + time.Second), - StartedAt: now.Add(-50 * time.Second), - PreviousProgress: first, - Phase: virtv1.MigrationRunning, - }) - - if progress <= first { - t.Fatalf("expected stall bump to increase progress beyond %d, got=%d", first, progress) - } -} - -func TestProgress_DoesNotAdvanceTooFastBetweenRapidSyncs(t *testing.T) { - now := time.Now() - p := NewProgress() - uid := types.UID("vmop") - - first := p.SyncProgress(Record{ - OperationUID: uid, - Now: now, - StartedAt: now.Add(-2 * time.Minute), - PreviousProgress: 10, + PreviousProgress: 30, Phase: virtv1.MigrationRunning, DataTotalMiB: 1024, - DataProcessedMiB: 900, - }) - second := p.SyncProgress(Record{ - OperationUID: uid, - Now: now.Add(200 * time.Millisecond), - StartedAt: now.Add(-2 * time.Minute), - PreviousProgress: first, - Phase: virtv1.MigrationRunning, - DataTotalMiB: 1024, - DataProcessedMiB: 900, + DataProcessedMiB: 300, + DataRemainingMiB: 700, }) - if second != first { - t.Fatalf("expected no progress change during rapid resync, first=%d second=%d", first, second) + var progress int32 + for i := 1; i <= 5; i++ { + stallDuration := time.Duration(i) * time.Duration(bulkStallSeconds+2) * time.Second + progress = p.SyncProgress(Record{ + OperationUID: uid, + Now: now.Add(stallDuration), + StartedAt: now.Add(-50 * time.Second), + PreviousProgress: progress, + Phase: virtv1.MigrationRunning, + DataTotalMiB: 1024, + DataProcessedMiB: 300, + DataRemainingMiB: 700, + }) } -} - -func TestProgress_CapsSingleBulkStep(t *testing.T) { - now := time.Now() - p := NewProgress() - - progress := p.SyncProgress(Record{ - OperationUID: types.UID("vmop"), - Now: now, - StartedAt: now.Add(-2 * time.Minute), - PreviousProgress: 10, - Phase: virtv1.MigrationRunning, - DataTotalMiB: 1024, - DataProcessedMiB: 1024, - }) - if progress > 17 { - t.Fatalf("expected first bulk step to be capped, got=%d", progress) + if progress <= first { + t.Fatalf("expected stall bump to increase progress beyond %d, got=%d", first, progress) } } @@ -199,6 +169,7 @@ func TestProgress_EntersIterativePhaseByIteration(t *testing.T) { Phase: virtv1.MigrationRunning, DataTotalMiB: 1024, DataProcessedMiB: 512, + DataRemainingMiB: 512, }) iterative := p.SyncProgress(Record{ OperationUID: uid, @@ -212,6 +183,7 @@ func TestProgress_EntersIterativePhaseByIteration(t *testing.T) { AutoConvergeThrottle: 50, Throttle: 0.5, DataTotalMiB: 1024, + DataProcessedMiB: 960, DataRemainingMiB: 64, }) @@ -240,32 +212,6 @@ func TestProgress_UsesRemainingDataFallback(t *testing.T) { } } -func TestMetricPercent_ClampsProcessedAboveTotal(t *testing.T) { - metricPct, hasMetric := metricPercent(Record{DataTotalMiB: 100, DataProcessedMiB: 200}) - if !hasMetric { - t.Fatal("expected metric to be available") - } - if metricPct != 100 { - t.Fatalf("expected clamped metric percent=100, got=%v", metricPct) - } -} - -func TestMetricPercent_ClampsRemainingAboveTotal(t *testing.T) { - metricPct, hasMetric := metricPercent(Record{DataTotalMiB: 100, DataRemainingMiB: 200}) - if !hasMetric { - t.Fatal("expected metric to be available") - } - if metricPct != 0 { - t.Fatalf("expected clamped metric percent=0, got=%v", metricPct) - } -} - -func TestMetricPercent_RequiresPositiveTotal(t *testing.T) { - if _, hasMetric := metricPercent(Record{DataTotalMiB: 0, DataProcessedMiB: 10}); hasMetric { - t.Fatal("expected metric to be unavailable for zero total") - } -} - func TestProgress_ZeroElapsed(t *testing.T) { now := time.Now() p := NewProgress() @@ -295,6 +241,8 @@ func TestProgress_VeryLargeElapsedStaysInRange(t *testing.T) { Phase: virtv1.MigrationRunning, HasIteration: true, Iteration: 5, + DataTotalMiB: 1024, + DataRemainingMiB: 10, }) if progress < SyncRangeMin || progress > SyncRangeMax { @@ -350,3 +298,174 @@ func TestForget_RemovesState(t *testing.T) { t.Fatalf("expected empty store after forget, got=%d", p.store.Len()) } } + +func TestProgress_SmoothGrowthOverMultipleSyncs(t *testing.T) { + now := time.Now() + p := NewProgress() + uid := types.UID("vmop") + start := now.Add(-10 * time.Second) + + var values []int32 + prev := SyncRangeMin + totalMiB := 1024.0 + remaining := 900.0 + + for i := 0; i < 40; i++ { + tick := now.Add(time.Duration(i*3) * time.Second) + remaining = math.Max(10, remaining-25) + processed := totalMiB - remaining + + iter := uint32(0) + hasIter := false + if i >= 5 { + iter = uint32(i - 4) + hasIter = true + } + + progress := p.SyncProgress(Record{ + OperationUID: uid, + Now: tick, + StartedAt: start, + PreviousProgress: prev, + Phase: virtv1.MigrationRunning, + HasIteration: hasIter, + Iteration: iter, + DataTotalMiB: totalMiB, + DataProcessedMiB: processed, + DataRemainingMiB: remaining, + }) + + values = append(values, progress) + prev = progress + } + + for i := 1; i < len(values); i++ { + if values[i] < values[i-1] { + t.Fatalf("progress decreased at step %d: %d -> %d", i, values[i-1], values[i]) + } + } + + maxJump := int32(0) + for i := 1; i < len(values); i++ { + jump := values[i] - values[i-1] + if jump > maxJump { + maxJump = jump + } + } + if maxJump > 15 { + t.Fatalf("max single-step jump too large: %d (values: %v)", maxJump, values) + } +} + +func TestIterativeMetricRatio(t *testing.T) { + tests := []struct { + name string + state State + wantLow float64 + wantHigh float64 + }{ + { + name: "initial remaining equals threshold", + state: State{InitialRemaining: 50, SmoothedRemaining: 50, Threshold: 50}, + wantLow: 0.99, + wantHigh: 1.01, + }, + { + name: "smoothed at initial", + state: State{InitialRemaining: 1000, SmoothedRemaining: 1000, Threshold: 50}, + wantLow: -0.01, + wantHigh: 0.01, + }, + { + name: "smoothed at threshold", + state: State{InitialRemaining: 1000, SmoothedRemaining: 50, Threshold: 50}, + wantLow: 0.99, + wantHigh: 1.01, + }, + { + name: "smoothed halfway log scale", + state: State{InitialRemaining: 1000, SmoothedRemaining: 200, Threshold: 50}, + wantLow: 0.3, + wantHigh: 0.7, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ratio := iterativeMetricRatio(&tt.state) + if ratio < tt.wantLow || ratio > tt.wantHigh { + t.Fatalf("iterativeMetricRatio() = %v, want in [%v, %v]", ratio, tt.wantLow, tt.wantHigh) + } + }) + } +} + +func TestMaxRemaining(t *testing.T) { + tests := []struct { + name string + record Record + want float64 + }{ + { + name: "direct remaining", + record: Record{DataRemainingMiB: 100}, + want: 100, + }, + { + name: "computed from total minus processed", + record: Record{DataTotalMiB: 200, DataProcessedMiB: 150}, + want: 50, + }, + { + name: "no data", + record: Record{DataTotalMiB: unknownMetric, DataProcessedMiB: unknownMetric, DataRemainingMiB: unknownMetric}, + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := maxRemaining(tt.record) + if !almostEqual(got, tt.want) { + t.Fatalf("maxRemaining() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestObserveRemaining_EMA(t *testing.T) { + state := State{SmoothedRemaining: 100} + + observeRemaining(Record{DataRemainingMiB: 80}, &state) + if state.SmoothedRemaining >= 100 || state.SmoothedRemaining <= 80 { + t.Fatalf("expected EMA to move smoothed remaining between 80 and 100, got=%v", state.SmoothedRemaining) + } + + prev := state.SmoothedRemaining + observeRemaining(Record{DataRemainingMiB: 120}, &state) + if state.SmoothedRemaining <= prev { + t.Fatalf("expected EMA to increase smoothed remaining from %v, got=%v", prev, state.SmoothedRemaining) + } +} + +func TestProgress_AdaptiveStallWindow(t *testing.T) { + state := State{Progress: 50, SmoothedRemaining: 100, Threshold: 50} + record := Record{Throttle: 0} + + w := stallWindow(record, &state, true) + if w != iterStallSeconds { + t.Fatalf("expected base iterative stall window=%v, got=%v", iterStallSeconds, w) + } + + state.Progress = int32(iterativeCeiling) - 2 + w = stallWindow(record, &state, true) + if w != 24.0 { + t.Fatalf("expected late-stage stall window=24, got=%v", w) + } + + state.Progress = int32(iterativeCeiling) - 4 + w = stallWindow(record, &state, true) + if w != 14.0 { + t.Fatalf("expected near-end stall window=14, got=%v", w) + } +} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go index 6d86896756..fdc4b501d5 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go @@ -29,14 +29,19 @@ const ( ) type State struct { - Progress int32 - Iterative bool - IterativeSince time.Time - LastUpdatedAt time.Time - LastMetricAt time.Time - LastIteration uint32 - LastProcessedMiB float64 - LastRemainingMiB float64 + Progress int32 + Iterative bool + IterativeSince time.Time + LastUpdatedAt time.Time + LastMetricAt time.Time + LastIteration uint32 + LastProcessedMiB float64 + LastRemainingMiB float64 + InitialTotal float64 + InitialRemaining float64 + SmoothedRemaining float64 + Threshold float64 + LastIncreaseAt time.Time } type Store struct { From b6b96222f9078586c370de397da16dcc9df95302 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 15:17:54 +0200 Subject: [PATCH 20/27] feat(core): detect not converging migration from remaining data stall Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 24 +++++++++- .../internal/handler/lifecycle_test.go | 4 ++ .../migration/internal/progress/mapper.go | 3 +- .../internal/progress/mapper_test.go | 6 +-- .../migration/internal/progress/progress.go | 44 +++++++++++++++++++ .../vmop/migration/internal/progress/store.go | 2 + 6 files changed, 77 insertions(+), 6 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 937dddc0dc..90d23d69c4 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -318,6 +318,16 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp return err } + autoConverge := h.resolveAutoConverge(vmop) + + if reason == vmopcondition.ReasonSyncing { + record := migrationprogress.BuildRecord(vmop, mig, autoConverge, time.Now()) + if h.progressStrategy != nil && h.progressStrategy.IsNotConverging(record) { + reason = vmopcondition.ReasonNotConverging + msg = "Migration is not converging: data remaining is not decreasing at maximum throttle" + } + } + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress if reason == vmopcondition.ReasonTargetScheduling { vmop.Status.Phase = v1alpha2.VMOPPhasePending @@ -603,8 +613,8 @@ func (h LifecycleHandler) calculateMigrationProgress( return progressTargetPreparing case vmopcondition.ReasonTargetDiskError: return progressTargetPreparing - case vmopcondition.ReasonSyncing: - record := migrationprogress.BuildRecord(vmop, mig, time.Now()) + case vmopcondition.ReasonSyncing, vmopcondition.ReasonNotConverging: + record := migrationprogress.BuildRecord(vmop, mig, h.resolveAutoConverge(vmop), time.Now()) return h.progressStrategy.SyncProgress(record) case vmopcondition.ReasonSourceSuspended: h.forgetProgress(vmop) @@ -624,6 +634,16 @@ func (h LifecycleHandler) calculateMigrationProgress( } } +func (h LifecycleHandler) resolveAutoConverge(vmop *v1alpha2.VirtualMachineOperation) bool { + if vmop == nil { + return false + } + if vmop.Spec.Force != nil && *vmop.Spec.Force { + return true + } + return false +} + func (h LifecycleHandler) forgetProgress(vmop *v1alpha2.VirtualMachineOperation) { if h.progressStrategy == nil || vmop == nil { return diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 2dd7ed24bf..931c74402a 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -55,6 +55,10 @@ func (s *progressStrategyStub) SyncProgress(_ migrationprogress.Record) int32 { return s.value } +func (s *progressStrategyStub) IsNotConverging(_ migrationprogress.Record) bool { + return false +} + func (s *progressStrategyStub) Forget(uid types.UID) { s.forgotten = append(s.forgotten, uid) } diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index 4d4b7553d2..384dd41e9a 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -26,11 +26,12 @@ import ( const unknownMetric = -1.0 -func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, now time.Time) Record { +func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, autoConverge bool, now time.Time) Record { record := Record{ Now: now, StartedAt: now, PreviousProgress: previousProgress(vmop), + AutoConverge: autoConverge, DataTotalMiB: unknownMetric, DataProcessedMiB: unknownMetric, DataRemainingMiB: unknownMetric, diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go index c5c55cb59d..c37f577010 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -31,7 +31,7 @@ import ( func TestBuildRecord_NilVMOPAndMigration(t *testing.T) { now := time.Unix(1710000000, 0) - record := BuildRecord(nil, nil, now) + record := BuildRecord(nil, nil, false, now) if !record.StartedAt.Equal(now) { t.Fatalf("expected StartedAt=%v, got %v", now, record.StartedAt) @@ -54,7 +54,7 @@ func TestBuildRecord_UsesVMOPCreationTimestampAndPreviousProgress(t *testing.T) Status: v1alpha2.VirtualMachineOperationStatus{Progress: ptr.To[int32](42)}, } - record := BuildRecord(vmop, nil, now) + record := BuildRecord(vmop, nil, false, now) if record.OperationUID != vmop.UID { t.Fatalf("expected OperationUID=%s, got %s", vmop.UID, record.OperationUID) @@ -90,7 +90,7 @@ func TestBuildRecord_UsesMigrationState(t *testing.T) { }, } - record := BuildRecord(nil, mig, now) + record := BuildRecord(nil, mig, false, now) if record.Phase != virtv1.MigrationRunning { t.Fatalf("expected Phase=%s, got %s", virtv1.MigrationRunning, record.Phase) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go index e910ab0fc0..cd7cc0547a 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress.go @@ -33,6 +33,8 @@ const ( iterativeCeiling = 90.0 thresholdFactor = 0.05 + notConvergingWindow = 10 * time.Second + bulkTimeRate = 0.55 iterBaseTimeRate = 0.022 iterThrottleRate = 0.0012 @@ -49,6 +51,7 @@ const ( type Strategy interface { SyncProgress(record Record) int32 + IsNotConverging(record Record) bool Forget(uid types.UID) } @@ -64,6 +67,7 @@ type Record struct { HasThrottle bool AutoConvergeThrottle uint32 Throttle float64 + AutoConverge bool DataTotalMiB float64 DataProcessedMiB float64 DataRemainingMiB float64 @@ -149,6 +153,7 @@ func (p *Progress) SyncProgress(record Record) int32 { } updateMetricState(record, &state) + updateMinRemaining(record, &state) state.Progress = next state.LastUpdatedAt = record.Now state.LastIteration = record.Iteration @@ -161,6 +166,27 @@ func (p *Progress) SyncProgress(record Record) int32 { return next } +func (p *Progress) IsNotConverging(record Record) bool { + if p == nil || p.store == nil || record.OperationUID == "" { + return false + } + + state, ok := p.store.Load(record.OperationUID) + if !ok || !state.Iterative { + return false + } + + if !isAtMaxThrottle(record) { + return false + } + + if state.MinRemaining <= 0 || state.MinRemainingAt.IsZero() { + return false + } + + return record.Now.Sub(state.MinRemainingAt) >= notConvergingWindow +} + func (p *Progress) getState(record Record) State { if p == nil || p.store == nil || record.OperationUID == "" { return State{Progress: clampSyncRange(record.PreviousProgress), LastMetricAt: record.Now} @@ -324,6 +350,24 @@ func maxRemaining(record Record) float64 { return 0 } +func updateMinRemaining(record Record, state *State) { + remaining := maxRemaining(record) + if remaining <= 0 { + return + } + if state.MinRemaining <= 0 || remaining < state.MinRemaining { + state.MinRemaining = remaining + state.MinRemainingAt = record.Now + } +} + +func isAtMaxThrottle(record Record) bool { + if !record.AutoConverge { + return true + } + return record.HasThrottle && record.Throttle >= 0.99 +} + func updateMetricState(record Record, state *State) { if !metricChanged(record, state) { return diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go index fdc4b501d5..e595a71534 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/store.go @@ -42,6 +42,8 @@ type State struct { SmoothedRemaining float64 Threshold float64 LastIncreaseAt time.Time + MinRemaining float64 + MinRemainingAt time.Time } type Store struct { From b0d1d3346a4583f10ade53afa5f9d308eaf5e5f4 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 16:03:12 +0200 Subject: [PATCH 21/27] fix(api, vmop): fix migration reason and progress logic - BuildRecord now reads AutoConverge from MigrationConfiguration.AllowAutoConverge instead of vmop.Spec.Force; remove resolveAutoConverge helper - isAtMaxThrottle: !AutoConverge => always at max (safe mode), AutoConverge => throttle >= 0.99 - Add live TargetDiskError detection via target pod events (FailedAttachVolume/FailedMount) - Preserve NotConverging terminal reason when migration fails with generic reason - Add unit tests for IsNotConverging, BuildRecord AutoConverge, and integration tests for TargetPreparing, TargetResumed, SourceSuspended, NotConverging persistence, TargetDiskError live detection Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 59 +++- .../internal/handler/lifecycle_test.go | 314 +++++++++++++++++- .../migration/internal/progress/mapper.go | 6 +- .../internal/progress/mapper_test.go | 59 +++- .../internal/progress/progress_test.go | 132 ++++++++ 5 files changed, 552 insertions(+), 18 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 90d23d69c4..0549eecb4f 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -24,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" virtv1 "kubevirt.io/api/core/v1" @@ -63,6 +64,11 @@ const ( messageSourceVMSuspended = "Source VM suspended" ) +const ( + reasonFailedAttachVolume = "FailedAttachVolume" + reasonFailedMount = "FailedMount" +) + type Base interface { Init(vmop *v1alpha2.VirtualMachineOperation) ShouldExecuteOrSetFailedPhase(ctx context.Context, vmop *v1alpha2.VirtualMachineOperation) (bool, error) @@ -290,6 +296,13 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp h.recorder.Event(vmop, corev1.EventTypeWarning, v1alpha2.ReasonErrVMOPFailed, "VirtualMachineOperation failed") reason := h.getFailedReason(mig) + if reason == vmopcondition.ReasonFailed { + if prev, found := conditions.GetCondition(vmopcondition.TypeCompleted, vmop.Status.Conditions); found { + if prev.Reason == vmopcondition.ReasonNotConverging.String() { + reason = vmopcondition.ReasonNotConverging + } + } + } msg := h.getFailedMessage(reason, mig) progress := h.calculateMigrationProgress(vmop, mig, reason) vmop.Status.Progress = ptr.To(progress) @@ -318,10 +331,8 @@ func (h LifecycleHandler) syncOperationComplete(ctx context.Context, vmop *v1alp return err } - autoConverge := h.resolveAutoConverge(vmop) - if reason == vmopcondition.ReasonSyncing { - record := migrationprogress.BuildRecord(vmop, mig, autoConverge, time.Now()) + record := migrationprogress.BuildRecord(vmop, mig, time.Now()) if h.progressStrategy != nil && h.progressStrategy.IsNotConverging(record) { reason = vmopcondition.ReasonNotConverging msg = "Migration is not converging: data remaining is not decreasing at maximum throttle" @@ -581,6 +592,9 @@ func (h LifecycleHandler) getInProgressReasonAndMessage( if isPodPendingUnschedulable(pod) { return vmopcondition.ReasonTargetUnschedulable, fmt.Sprintf("Target pod %q is unschedulable", pod.Namespace+"/"+pod.Name), nil } + if diskErrMsg, hasDiskErr := h.getTargetPodDiskError(ctx, pod); hasDiskErr { + return vmopcondition.ReasonTargetDiskError, fmt.Sprintf("Target pod has disk attach error: %s", diskErrMsg), nil + } if mig.Status.MigrationState != nil { state := mig.Status.MigrationState @@ -614,7 +628,7 @@ func (h LifecycleHandler) calculateMigrationProgress( case vmopcondition.ReasonTargetDiskError: return progressTargetPreparing case vmopcondition.ReasonSyncing, vmopcondition.ReasonNotConverging: - record := migrationprogress.BuildRecord(vmop, mig, h.resolveAutoConverge(vmop), time.Now()) + record := migrationprogress.BuildRecord(vmop, mig, time.Now()) return h.progressStrategy.SyncProgress(record) case vmopcondition.ReasonSourceSuspended: h.forgetProgress(vmop) @@ -634,14 +648,39 @@ func (h LifecycleHandler) calculateMigrationProgress( } } -func (h LifecycleHandler) resolveAutoConverge(vmop *v1alpha2.VirtualMachineOperation) bool { - if vmop == nil { - return false +func (h LifecycleHandler) getTargetPodDiskError(ctx context.Context, pod *corev1.Pod) (string, bool) { + if pod == nil { + return "", false } - if vmop.Spec.Force != nil && *vmop.Spec.Force { - return true + + for _, cs := range pod.Status.InitContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { + break + } } - return false + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { + eventList := &corev1.EventList{} + err := h.client.List(ctx, eventList, &client.ListOptions{ + Namespace: pod.Namespace, + FieldSelector: fields.SelectorFromSet(fields.Set{ + "involvedObject.name": pod.Name, + "involvedObject.kind": "Pod", + }), + }) + if err != nil { + return "", false + } + for _, e := range eventList.Items { + if e.Type == corev1.EventTypeWarning && (e.Reason == reasonFailedAttachVolume || e.Reason == reasonFailedMount) { + return fmt.Sprintf("%s: %s", e.Reason, e.Message), true + } + } + return "", false + } + } + + return "", false } func (h LifecycleHandler) forgetProgress(vmop *v1alpha2.VirtualMachineOperation) { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 931c74402a..6f1599a7fb 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -47,8 +47,9 @@ import ( ) type progressStrategyStub struct { - value int32 - forgotten []types.UID + value int32 + isNotConverging bool + forgotten []types.UID } func (s *progressStrategyStub) SyncProgress(_ migrationprogress.Record) int32 { @@ -56,7 +57,7 @@ func (s *progressStrategyStub) SyncProgress(_ migrationprogress.Record) int32 { } func (s *progressStrategyStub) IsNotConverging(_ migrationprogress.Record) bool { - return false + return s.isNotConverging } func (s *progressStrategyStub) Forget(uid types.UID) { @@ -522,5 +523,312 @@ var _ = Describe("LifecycleHandler", func() { Expect(srv.Changed().Status.Progress).NotTo(BeNil()) Expect(*srv.Changed().Status.Progress).To(Equal(int32(100))) }) + + It("should override Syncing with NotConverging when strategy detects stall", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Progress = ptr.To[int32](50) + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationRunning + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ + StartTimestamp: &metav1.Time{Time: time.Now().Add(-2 * time.Minute)}, + } + + stub := &progressStrategyStub{value: 50, isNotConverging: true} + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + h.progressStrategy = stub + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseInProgress)) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonNotConverging.String())) + }) + + It("should stay Syncing when strategy does not detect stall", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Progress = ptr.To[int32](30) + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationRunning + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ + StartTimestamp: &metav1.Time{Time: time.Now().Add(-1 * time.Minute)}, + } + + stub := &progressStrategyStub{value: 30, isNotConverging: false} + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + h.progressStrategy = stub + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonSyncing.String())) + }) + + It("should prefer Aborted over NotConverging for terminal reason", func() { + h := LifecycleHandler{} + mig := &virtv1.VirtualMachineInstanceMigration{ + Status: virtv1.VirtualMachineInstanceMigrationStatus{ + MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ + AbortRequested: true, + FailureReason: "no progress during convergence", + }, + }, + } + Expect(h.getFailedReason(mig)).To(Equal(vmopcondition.ReasonAborted)) + }) + + It("should set completed condition reason on success", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationSucceeded + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonMigrationCompleted.String())) + }) + + It("should use OperationFailed reason when migration is nil (mig==nil path)", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Conditions = []metav1.Condition{ + { + Type: vmopcondition.TypeSignalSent.String(), + Status: metav1.ConditionTrue, + Reason: vmopcondition.ReasonSignalSentSuccess.String(), + }, + } + + fakeClient, srv = setupEnvironment(vmop, vm) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseFailed)) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonOperationFailed.String())) + }) + + It("should set target preparing progress (3) for scheduled migration", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationScheduled + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(Equal(int32(3))) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonTargetPreparing.String())) + }) + + It("should set target resumed progress (92) when domain ready timestamp is set", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationRunning + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ + TargetNodeDomainReadyTimestamp: &metav1.Time{Time: time.Now()}, + } + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(Equal(int32(92))) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonTargetResumed.String())) + }) + + It("should set source suspended progress (91) when migration state completed flag is set", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationRunning + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ + Completed: true, + TargetNodeDomainReadyTimestamp: &metav1.Time{Time: time.Now()}, + } + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Progress).NotTo(BeNil()) + Expect(*srv.Changed().Status.Progress).To(Equal(int32(91))) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonSourceSuspended.String())) + }) + + It("should preserve NotConverging reason when migration fails with generic reason", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Progress = ptr.To[int32](60) + vmop.Status.Conditions = []metav1.Condition{ + { + Type: vmopcondition.TypeSignalSent.String(), + Status: metav1.ConditionTrue, + Reason: vmopcondition.ReasonSignalSentSuccess.String(), + }, + { + Type: vmopcondition.TypeCompleted.String(), + Status: metav1.ConditionFalse, + Reason: vmopcondition.ReasonNotConverging.String(), + }, + } + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationFailed + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseFailed)) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonNotConverging.String())) + }) + + It("should NOT preserve NotConverging when migration fails with specific reason (Aborted)", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vmop := newVMOPMigrate() + vmop.Status.Phase = v1alpha2.VMOPPhaseInProgress + vmop.Status.Progress = ptr.To[int32](60) + vmop.Status.Conditions = []metav1.Condition{ + { + Type: vmopcondition.TypeSignalSent.String(), + Status: metav1.ConditionTrue, + Reason: vmopcondition.ReasonSignalSentSuccess.String(), + }, + { + Type: vmopcondition.TypeCompleted.String(), + Status: metav1.ConditionFalse, + Reason: vmopcondition.ReasonNotConverging.String(), + }, + } + + mig := newSimpleMigration(fmt.Sprintf("vmop-%s", vmop.Name), name) + mig.Status.Phase = virtv1.MigrationFailed + mig.Status.MigrationState = &virtv1.VirtualMachineInstanceMigrationState{ + AbortRequested: true, + } + + fakeClient, srv = setupEnvironment(vmop, vm, mig) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + + _, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhaseFailed)) + + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonAborted.String())) + }) + + It("should return TargetDiskError when target pod has disk attach error", func() { + mig := newSimpleMigration("vmop-test", name) + mig.UID = "migration-uid" + mig.Status.Phase = virtv1.MigrationPreparingTarget + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: "target-pod", + Labels: map[string]string{ + virtv1.AppLabel: "virt-launcher", + virtv1.MigrationJobLabel: "migration-uid", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "compute", + State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "ContainerCreating"}}, + }, + }, + }, + } + event := &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: "disk-event"}, + InvolvedObject: corev1.ObjectReference{Name: "target-pod", Kind: "Pod", Namespace: namespace}, + Type: corev1.EventTypeWarning, + Reason: "FailedAttachVolume", + Message: "failed to attach disk", + } + + fakeClient, err := testutil.NewFakeClientWithObjects(mig, pod, event) + Expect(err).NotTo(HaveOccurred()) + + h := LifecycleHandler{client: fakeClient} + reason, _, err := h.getInProgressReasonAndMessage(ctx, mig) + Expect(err).NotTo(HaveOccurred()) + Expect(reason).To(Equal(vmopcondition.ReasonTargetDiskError)) + }) }) }) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index 384dd41e9a..f590deb9c7 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -26,12 +26,11 @@ import ( const unknownMetric = -1.0 -func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, autoConverge bool, now time.Time) Record { +func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMachineInstanceMigration, now time.Time) Record { record := Record{ Now: now, StartedAt: now, PreviousProgress: previousProgress(vmop), - AutoConverge: autoConverge, DataTotalMiB: unknownMetric, DataProcessedMiB: unknownMetric, DataRemainingMiB: unknownMetric, @@ -58,6 +57,9 @@ func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMach record.DataTotalMiB = mapBytesToMiB(state.DataTotalBytes) record.DataProcessedMiB = mapBytesToMiB(state.DataProcessedBytes) record.DataRemainingMiB = mapBytesToMiB(state.DataRemainingBytes) + if state.MigrationConfiguration != nil && state.MigrationConfiguration.AllowAutoConverge != nil { + record.AutoConverge = *state.MigrationConfiguration.AllowAutoConverge + } } return record diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go index c37f577010..055af6a88f 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -31,7 +31,7 @@ import ( func TestBuildRecord_NilVMOPAndMigration(t *testing.T) { now := time.Unix(1710000000, 0) - record := BuildRecord(nil, nil, false, now) + record := BuildRecord(nil, nil, now) if !record.StartedAt.Equal(now) { t.Fatalf("expected StartedAt=%v, got %v", now, record.StartedAt) @@ -54,7 +54,7 @@ func TestBuildRecord_UsesVMOPCreationTimestampAndPreviousProgress(t *testing.T) Status: v1alpha2.VirtualMachineOperationStatus{Progress: ptr.To[int32](42)}, } - record := BuildRecord(vmop, nil, false, now) + record := BuildRecord(vmop, nil, now) if record.OperationUID != vmop.UID { t.Fatalf("expected OperationUID=%s, got %s", vmop.UID, record.OperationUID) @@ -90,7 +90,7 @@ func TestBuildRecord_UsesMigrationState(t *testing.T) { }, } - record := BuildRecord(nil, mig, false, now) + record := BuildRecord(nil, mig, now) if record.Phase != virtv1.MigrationRunning { t.Fatalf("expected Phase=%s, got %s", virtv1.MigrationRunning, record.Phase) @@ -238,3 +238,56 @@ func TestMapThrottle(t *testing.T) { }) } } + +func TestBuildRecord_AutoConvergeFromMigrationConfiguration(t *testing.T) { + now := time.Unix(1710000000, 0) + allowAutoConverge := true + mig := &virtv1.VirtualMachineInstanceMigration{ + Status: virtv1.VirtualMachineInstanceMigrationStatus{ + MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ + MigrationConfiguration: &virtv1.MigrationConfiguration{ + AllowAutoConverge: &allowAutoConverge, + }, + }, + }, + } + + record := BuildRecord(nil, mig, now) + if !record.AutoConverge { + t.Fatal("expected AutoConverge=true from MigrationConfiguration.AllowAutoConverge") + } +} + +func TestBuildRecord_AutoConverge_False_WhenNotSet(t *testing.T) { + now := time.Unix(1710000000, 0) + + recordNoMig := BuildRecord(nil, nil, now) + if recordNoMig.AutoConverge { + t.Fatal("expected AutoConverge=false when mig is nil") + } + + migNoConfig := &virtv1.VirtualMachineInstanceMigration{ + Status: virtv1.VirtualMachineInstanceMigrationStatus{ + MigrationState: &virtv1.VirtualMachineInstanceMigrationState{}, + }, + } + recordNoConfig := BuildRecord(nil, migNoConfig, now) + if recordNoConfig.AutoConverge { + t.Fatal("expected AutoConverge=false when MigrationConfiguration is nil") + } + + allowAutoConverge := false + migFalse := &virtv1.VirtualMachineInstanceMigration{ + Status: virtv1.VirtualMachineInstanceMigrationStatus{ + MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ + MigrationConfiguration: &virtv1.MigrationConfiguration{ + AllowAutoConverge: &allowAutoConverge, + }, + }, + }, + } + recordFalse := BuildRecord(nil, migFalse, now) + if recordFalse.AutoConverge { + t.Fatal("expected AutoConverge=false when AllowAutoConverge=false") + } +} diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go index 660da41182..d1296af6cf 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/progress_test.go @@ -469,3 +469,135 @@ func TestProgress_AdaptiveStallWindow(t *testing.T) { t.Fatalf("expected near-end stall window=14, got=%v", w) } } + +func makeIterativeState(p *Progress, uid types.UID, now time.Time, minRemaining float64, minRemainingAt time.Time) { + state := State{ + Progress: SyncRangeMin, + Iterative: true, + IterativeSince: now.Add(-30 * time.Second), + InitialRemaining: 500, + SmoothedRemaining: minRemaining + 10, + Threshold: 10, + MinRemaining: minRemaining, + MinRemainingAt: minRemainingAt, + } + p.store.Store(uid, state) +} + +func TestIsNotConverging_NoAutoConverge_Stall(t *testing.T) { + p := NewProgress() + uid := types.UID("vmop-nc") + now := time.Now() + stallStart := now.Add(-15 * time.Second) + + makeIterativeState(p, uid, now, 100.0, stallStart) + + record := Record{ + OperationUID: uid, + Now: now, + HasIteration: true, + Iteration: 3, + AutoConverge: false, + DataRemainingMiB: 100, + } + + if !p.IsNotConverging(record) { + t.Fatal("expected IsNotConverging=true when AutoConverge=false, iterative, stall>10s") + } +} + +func TestIsNotConverging_AutoConverge_ThrottleNotMax(t *testing.T) { + p := NewProgress() + uid := types.UID("vmop-nc2") + now := time.Now() + stallStart := now.Add(-15 * time.Second) + + makeIterativeState(p, uid, now, 100.0, stallStart) + + record := Record{ + OperationUID: uid, + Now: now, + HasIteration: true, + Iteration: 3, + AutoConverge: true, + HasThrottle: true, + Throttle: 0.5, + DataRemainingMiB: 100, + } + + if p.IsNotConverging(record) { + t.Fatal("expected IsNotConverging=false when AutoConverge=true and throttle not at max") + } +} + +func TestIsNotConverging_AutoConverge_MaxThrottle_Stall(t *testing.T) { + p := NewProgress() + uid := types.UID("vmop-nc3") + now := time.Now() + stallStart := now.Add(-15 * time.Second) + + makeIterativeState(p, uid, now, 100.0, stallStart) + + record := Record{ + OperationUID: uid, + Now: now, + HasIteration: true, + Iteration: 3, + AutoConverge: true, + HasThrottle: true, + Throttle: 0.99, + DataRemainingMiB: 100, + } + + if !p.IsNotConverging(record) { + t.Fatal("expected IsNotConverging=true when AutoConverge=true, throttle=max, stall>10s") + } +} + +func TestIsNotConverging_RemainingDecreased(t *testing.T) { + p := NewProgress() + uid := types.UID("vmop-nc4") + now := time.Now() + + makeIterativeState(p, uid, now, 50.0, now) + + record := Record{ + OperationUID: uid, + Now: now, + HasIteration: true, + Iteration: 3, + AutoConverge: false, + DataRemainingMiB: 50, + } + + if p.IsNotConverging(record) { + t.Fatal("expected IsNotConverging=false when minRemainingAt is just now (stall < 10s)") + } +} + +func TestIsNotConverging_NotIterative(t *testing.T) { + p := NewProgress() + uid := types.UID("vmop-nc5") + now := time.Now() + stallStart := now.Add(-15 * time.Second) + + state := State{ + Progress: SyncRangeMin, + Iterative: false, + MinRemaining: 100, + MinRemainingAt: stallStart, + } + p.store.Store(uid, state) + + record := Record{ + OperationUID: uid, + Now: now, + HasIteration: false, + AutoConverge: false, + DataRemainingMiB: 100, + } + + if p.IsNotConverging(record) { + t.Fatal("expected IsNotConverging=false when not iterative") + } +} From be7c47eca33cae2f26c421c479af81d984461f09 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 16:16:55 +0200 Subject: [PATCH 22/27] fix(api, vmop): fix getTargetPodDiskError to check Pending pod directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kubelet emits FailedAttachVolume/FailedMount events while pod is in Pending phase — before ContainerCreating state is ever reached. The previous ContainerCreating guard prevented detection entirely. Now check events for any Pending pod that is not being deleted. Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 0549eecb4f..3b4f678a96 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -649,37 +649,26 @@ func (h LifecycleHandler) calculateMigrationProgress( } func (h LifecycleHandler) getTargetPodDiskError(ctx context.Context, pod *corev1.Pod) (string, bool) { - if pod == nil { + if pod == nil || pod.Status.Phase != corev1.PodPending || pod.DeletionTimestamp != nil { return "", false } - for _, cs := range pod.Status.InitContainerStatuses { - if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { - break - } + eventList := &corev1.EventList{} + err := h.client.List(ctx, eventList, &client.ListOptions{ + Namespace: pod.Namespace, + FieldSelector: fields.SelectorFromSet(fields.Set{ + "involvedObject.name": pod.Name, + "involvedObject.kind": "Pod", + }), + }) + if err != nil { + return "", false } - for _, cs := range pod.Status.ContainerStatuses { - if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { - eventList := &corev1.EventList{} - err := h.client.List(ctx, eventList, &client.ListOptions{ - Namespace: pod.Namespace, - FieldSelector: fields.SelectorFromSet(fields.Set{ - "involvedObject.name": pod.Name, - "involvedObject.kind": "Pod", - }), - }) - if err != nil { - return "", false - } - for _, e := range eventList.Items { - if e.Type == corev1.EventTypeWarning && (e.Reason == reasonFailedAttachVolume || e.Reason == reasonFailedMount) { - return fmt.Sprintf("%s: %s", e.Reason, e.Message), true - } - } - return "", false + for _, e := range eventList.Items { + if e.Type == corev1.EventTypeWarning && (e.Reason == reasonFailedAttachVolume || e.Reason == reasonFailedMount) { + return fmt.Sprintf("%s: %s", e.Reason, e.Message), true } } - return "", false } From 5619cf38754e316ba8029ebe51ce18af0d336e62 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 17:17:54 +0200 Subject: [PATCH 23/27] Revert "chore(core): add 3p-kubevirt branch info" This reverts commit 56de1b86f7941385cf5a8606bb98a39216174512. Signed-off-by: Daniil Antoshin --- images/virt-artifact/werf.inc.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/images/virt-artifact/werf.inc.yaml b/images/virt-artifact/werf.inc.yaml index 9e0d340bfb..f30560fba6 100644 --- a/images/virt-artifact/werf.inc.yaml +++ b/images/virt-artifact/werf.inc.yaml @@ -2,7 +2,7 @@ # Source https://github.com/kubevirt/kubevirt/blob/v1.3.1/hack/dockerized#L15 {{- $gitRepoName := "3p-kubevirt" }} {{- $gitRepoUrl := (printf "%s/%s" "deckhouse" $gitRepoName) }} -{{- $tag := "feat/vm/migration-progress" }} +{{- $tag := get $.Core $gitRepoName }} {{- $version := (split "-" $tag)._0 }} --- @@ -13,10 +13,8 @@ secrets: - id: SOURCE_REPO value: {{ $.SOURCE_REPO }} shell: - installCacheVersion: "{{ now | date "Mon Jan 2 15:04:05 MST 2006" }}" install: - | - echo "$date" echo "Git clone {{ $gitRepoName }} repository..." git clone --depth=1 $(cat /run/secrets/SOURCE_REPO)/{{ $gitRepoUrl }} --branch {{ $tag }} /src/kubevirt From aa916e79044ebc90872438342895c06f02696e68 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 17:51:14 +0200 Subject: [PATCH 24/27] feat(vmop): support nested migration transfer status Signed-off-by: Daniil Antoshin --- images/virtualization-artifact/go.mod | 2 +- .../migration/internal/progress/mapper.go | 35 +++++++++++++++---- .../internal/progress/mapper_test.go | 28 +++++++++------ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/images/virtualization-artifact/go.mod b/images/virtualization-artifact/go.mod index f3b5835c2b..6f50f27f04 100644 --- a/images/virtualization-artifact/go.mod +++ b/images/virtualization-artifact/go.mod @@ -168,4 +168,4 @@ replace ( ) // Kubevirt API replaces -replace kubevirt.io/api => github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403095053-aefa74c02fee +replace kubevirt.io/api => ../../../3p-kubevirt/staging/src/kubevirt.io/api diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go index f590deb9c7..77446e4b79 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper.go @@ -54,9 +54,9 @@ func BuildRecord(vmop *v1alpha2.VirtualMachineOperation, mig *virtv1.VirtualMach record.Iteration, record.HasIteration = mapIteration(state) record.AutoConvergeThrottle, record.HasThrottle = mapThrottle(state) record.Throttle = normalizeThrottle(record.AutoConvergeThrottle, record.HasThrottle) - record.DataTotalMiB = mapBytesToMiB(state.DataTotalBytes) - record.DataProcessedMiB = mapBytesToMiB(state.DataProcessedBytes) - record.DataRemainingMiB = mapBytesToMiB(state.DataRemainingBytes) + record.DataTotalMiB = mapDataTotalMiB(state) + record.DataProcessedMiB = mapDataProcessedMiB(state) + record.DataRemainingMiB = mapDataRemainingMiB(state) if state.MigrationConfiguration != nil && state.MigrationConfiguration.AllowAutoConverge != nil { record.AutoConverge = *state.MigrationConfiguration.AllowAutoConverge } @@ -80,17 +80,38 @@ func previousProgress(vmop *v1alpha2.VirtualMachineOperation) int32 { } func mapIteration(state *virtv1.VirtualMachineInstanceMigrationState) (uint32, bool) { - if state == nil || state.Iteration == nil { + if state == nil || state.TransferStatus == nil || state.TransferStatus.Iteration == nil { return 0, false } - return *state.Iteration, true + return *state.TransferStatus.Iteration, true } func mapThrottle(state *virtv1.VirtualMachineInstanceMigrationState) (uint32, bool) { - if state == nil || state.AutoConvergeThrottle == nil { + if state == nil || state.TransferStatus == nil || state.TransferStatus.AutoConvergeThrottle == nil { return 0, false } - return *state.AutoConvergeThrottle, true + return *state.TransferStatus.AutoConvergeThrottle, true +} + +func mapDataTotalMiB(state *virtv1.VirtualMachineInstanceMigrationState) float64 { + if state == nil || state.TransferStatus == nil { + return unknownMetric + } + return mapBytesToMiB(state.TransferStatus.DataTotalBytes) +} + +func mapDataProcessedMiB(state *virtv1.VirtualMachineInstanceMigrationState) float64 { + if state == nil || state.TransferStatus == nil { + return unknownMetric + } + return mapBytesToMiB(state.TransferStatus.DataProcessedBytes) +} + +func mapDataRemainingMiB(state *virtv1.VirtualMachineInstanceMigrationState) float64 { + if state == nil || state.TransferStatus == nil { + return unknownMetric + } + return mapBytesToMiB(state.TransferStatus.DataRemainingBytes) } func normalizeThrottle(raw uint32, ok bool) float64 { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go index 055af6a88f..9395a1551a 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/progress/mapper_test.go @@ -79,13 +79,15 @@ func TestBuildRecord_UsesMigrationState(t *testing.T) { Status: virtv1.VirtualMachineInstanceMigrationStatus{ Phase: virtv1.MigrationRunning, MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ - StartTimestamp: &start, - Mode: virtv1.MigrationPreCopy, - Iteration: &iteration, - AutoConvergeThrottle: &autoConvergeThrottle, - DataTotalBytes: &totalBytes, - DataProcessedBytes: &processedBytes, - DataRemainingBytes: &remainingBytes, + StartTimestamp: &start, + Mode: virtv1.MigrationPreCopy, + TransferStatus: &virtv1.VirtualMachineInstanceMigrationTransferStatus{ + Iteration: &iteration, + AutoConvergeThrottle: &autoConvergeThrottle, + DataTotalBytes: &totalBytes, + DataProcessedBytes: &processedBytes, + DataRemainingBytes: &remainingBytes, + }, }, }, } @@ -167,8 +169,10 @@ func TestMapIteration(t *testing.T) { wantSet: false, }, { - name: "explicit iteration", - state: &virtv1.VirtualMachineInstanceMigrationState{Iteration: ptr.To[uint32](7)}, + name: "explicit iteration", + state: &virtv1.VirtualMachineInstanceMigrationState{TransferStatus: &virtv1.VirtualMachineInstanceMigrationTransferStatus{ + Iteration: ptr.To[uint32](7), + }}, want: 7, wantSet: true, }, @@ -218,8 +222,10 @@ func TestMapThrottle(t *testing.T) { wantValue: 0, }, { - name: "explicit throttle", - state: &virtv1.VirtualMachineInstanceMigrationState{AutoConvergeThrottle: ptr.To[uint32](70)}, + name: "explicit throttle", + state: &virtv1.VirtualMachineInstanceMigrationState{TransferStatus: &virtv1.VirtualMachineInstanceMigrationTransferStatus{ + AutoConvergeThrottle: ptr.To[uint32](70), + }}, wantRaw: 70, wantSet: true, wantValue: 0.7, From ab23371abddf61a9d65d59d7215fbff379257b3b Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 17:53:50 +0200 Subject: [PATCH 25/27] chore(vmop): update kubevirt api replace revision Signed-off-by: Daniil Antoshin --- images/virtualization-artifact/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/virtualization-artifact/go.mod b/images/virtualization-artifact/go.mod index 6f50f27f04..0b1b9a3238 100644 --- a/images/virtualization-artifact/go.mod +++ b/images/virtualization-artifact/go.mod @@ -168,4 +168,4 @@ replace ( ) // Kubevirt API replaces -replace kubevirt.io/api => ../../../3p-kubevirt/staging/src/kubevirt.io/api +replace kubevirt.io/api => github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403154920-301347b413ce From 0d83b468f8d70c5eddd1d6fd7d441ea9952f7617 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 18:04:37 +0200 Subject: [PATCH 26/27] fix(vmop): check target disk errors only in container creating Signed-off-by: Daniil Antoshin --- images/virtualization-artifact/go.sum | 2 + .../migration/internal/handler/lifecycle.go | 14 ++++++- .../internal/handler/lifecycle_test.go | 41 +++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/images/virtualization-artifact/go.sum b/images/virtualization-artifact/go.sum index 03d6bf401a..e85f01e7e7 100644 --- a/images/virtualization-artifact/go.sum +++ b/images/virtualization-artifact/go.sum @@ -51,6 +51,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403095053-aefa74c02fee h1:FL3Sn9OL9HZZX01vWiO6t6ps8nkxH+AOilBp+Rdp6iU= github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403095053-aefa74c02fee/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403154920-301347b413ce h1:b6I/SUcA30j2wcOBBERbN20cKARaDAHNtSzP4XB6kgg= +github.com/deckhouse/3p-kubevirt/staging/src/kubevirt.io/api v0.0.0-20260403154920-301347b413ce/go.mod h1:wGZLfRa/b4w/V/hakmfcK0CmrAZGfpj+jN7BMt0s19E= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5 h1:PsN1E0oxC/+4zdA977txrqUCuObFL3HAuu5Xnud8m8c= github.com/deckhouse/deckhouse/pkg/log v0.0.0-20250226105106-176cd3afcdd5/go.mod h1:Mk5HRzkc5pIcDIZ2JJ6DPuuqnwhXVkb3you8M8Mg+4w= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 3b4f678a96..2618bcf3b8 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -649,7 +649,7 @@ func (h LifecycleHandler) calculateMigrationProgress( } func (h LifecycleHandler) getTargetPodDiskError(ctx context.Context, pod *corev1.Pod) (string, bool) { - if pod == nil || pod.Status.Phase != corev1.PodPending || pod.DeletionTimestamp != nil { + if pod == nil || !isContainerCreating(pod) || pod.DeletionTimestamp != nil { return "", false } @@ -703,6 +703,18 @@ func (h LifecycleHandler) getTargetPod(ctx context.Context, mig *virtv1.VirtualM return nil, nil } +func isContainerCreating(pod *corev1.Pod) bool { + if pod == nil || pod.Status.Phase != corev1.PodPending { + return false + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { + return true + } + } + return false +} + func isPodPendingUnschedulable(pod *corev1.Pod) bool { if pod == nil { return false diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index 6f1599a7fb..d89e17b529 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -830,5 +830,46 @@ var _ = Describe("LifecycleHandler", func() { Expect(err).NotTo(HaveOccurred()) Expect(reason).To(Equal(vmopcondition.ReasonTargetDiskError)) }) + + It("should ignore target pod disk attach error when pod is not in ContainerCreating", func() { + mig := newSimpleMigration("vmop-test", name) + mig.UID = "migration-uid" + mig.Status.Phase = virtv1.MigrationPreparingTarget + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: "target-pod", + Labels: map[string]string{ + virtv1.AppLabel: "virt-launcher", + virtv1.MigrationJobLabel: "migration-uid", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "compute", + State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "ImagePullBackOff"}}, + }, + }, + }, + } + event := &corev1.Event{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: "disk-event"}, + InvolvedObject: corev1.ObjectReference{Name: "target-pod", Kind: "Pod", Namespace: namespace}, + Type: corev1.EventTypeWarning, + Reason: "FailedAttachVolume", + Message: "failed to attach disk", + } + + fakeClient, err := testutil.NewFakeClientWithObjects(mig, pod, event) + Expect(err).NotTo(HaveOccurred()) + + h := LifecycleHandler{client: fakeClient} + reason, _, err := h.getInProgressReasonAndMessage(ctx, mig) + Expect(err).NotTo(HaveOccurred()) + Expect(reason).To(Equal(vmopcondition.ReasonTargetPreparing)) + }) }) }) From 602ee4647f5b561f97cb3e71cf449e2dbf3e4e40 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Fri, 3 Apr 2026 18:39:25 +0200 Subject: [PATCH 27/27] fix(core, kubevirt): move migration transfer fields under transferStatus Signed-off-by: Daniil Antoshin --- build/components/versions.yml | 2 +- crds/embedded/virtualmachineinstances.yaml | 55 ++++++++++++---------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/build/components/versions.yml b/build/components/versions.yml index f72a94e45d..f47ec65762 100644 --- a/build/components/versions.yml +++ b/build/components/versions.yml @@ -3,7 +3,7 @@ firmware: libvirt: v10.9.0 edk2: stable202411 core: - 3p-kubevirt: v1.6.2-v12n.21 + 3p-kubevirt: feat/vm/migration-progress 3p-containerized-data-importer: v1.60.3-v12n.17 distribution: 2.8.3 package: diff --git a/crds/embedded/virtualmachineinstances.yaml b/crds/embedded/virtualmachineinstances.yaml index 8868aa1a14..f68101a3b8 100644 --- a/crds/embedded/virtualmachineinstances.yaml +++ b/crds/embedded/virtualmachineinstances.yaml @@ -3989,31 +3989,6 @@ spec: completed: description: Indicates the migration completed type: boolean - dataProcessedBytes: - description: DataProcessedBytes is the amount of migration data already - processed by the source runtime. - format: int64 - type: integer - dataRemainingBytes: - description: DataRemainingBytes is the amount of migration data still - remaining on the source runtime. - format: int64 - type: integer - dataTotalBytes: - description: DataTotalBytes is the total amount of migration data reported - by the source runtime. - format: int64 - type: integer - iteration: - description: Iteration is the current migration iteration reported by - the source runtime. - format: int32 - type: integer - autoConvergeThrottle: - description: AutoConvergeThrottle is the current auto-converge throttle - reported by the source runtime. - format: int32 - type: integer endTimestamp: description: The time the migration action ended format: date-time @@ -4125,6 +4100,36 @@ spec: description: Lets us know if the vmi is currently running pre or post copy migration type: string + transferStatus: + description: TransferStatus contains migration transfer details + reported by the source runtime. + properties: + autoConvergeThrottle: + description: AutoConvergeThrottle is the current auto-converge throttle + reported by the source runtime. + format: int32 + type: integer + dataProcessedBytes: + description: DataProcessedBytes is the amount of migration data already + processed by the source runtime. + format: int64 + type: integer + dataRemainingBytes: + description: DataRemainingBytes is the amount of migration data still + remaining on the source runtime. + format: int64 + type: integer + dataTotalBytes: + description: DataTotalBytes is the total amount of migration data reported + by the source runtime. + format: int64 + type: integer + iteration: + description: Iteration is the current migration iteration reported by + the source runtime. + format: int32 + type: integer + type: object sourceNode: description: The source node that the VMI originated on type: string