diff --git a/build/components/versions.yml b/build/components/versions.yml index c35bd7f2e6..26c19cfe18 100644 --- a/build/components/versions.yml +++ b/build/components/versions.yml @@ -3,7 +3,7 @@ firmware: libvirt: v10.9.0 edk2: stable202411 core: - 3p-kubevirt: dvp/set-memory-limits-while-hotplugging + 3p-kubevirt: dvp/hotplug-cpu-prefer-cores-over-sockets 3p-containerized-data-importer: v1.60.3-v12n.17 distribution: 2.8.3 package: diff --git a/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go b/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go index 2321386fc1..a38897542a 100644 --- a/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go +++ b/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go @@ -20,6 +20,8 @@ import ( "fmt" "maps" "os" + "strconv" + "strings" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -52,6 +54,16 @@ const ( EnableMemoryHotplugThreshold = 1 * 1024 * 1024 * 1024 // 1 Gi (no hotplug for VMs with less than 1Gi) ) +const ( + // VCPUTopologyDynamicCoresAnnotation annotation indicates "distributed by sockets" or "dynamic cores number" VCPU topology. + VCPUTopologyDynamicCoresAnnotation = "internal.virtualization.deckhouse.io/vcpu-topology-dynamic-cores" + + CPUResourcesRequestsFractionAnnotation = "internal.virtualization.deckhouse.io/cpu-resources-requests-fraction" + + // CPUMaxCoresPerSocket is a maximum number of cores per socket. + CPUMaxCoresPerSocket = 16 +) + type KVVMOptions struct { EnableParavirtualization bool OsType v1alpha2.OsType @@ -247,6 +259,17 @@ func (b *KVVM) SetTopologySpreadConstraint(topology []corev1.TopologySpreadConst } func (b *KVVM) SetCPU(cores int, coreFraction string) error { + // Support for VMs started with cpu configuration in requests-limits. + // TODO delete this in the future (around 3-4 more versions after enabling cpu hotplug by default). + if b.ResourceExists && isVMRunningWithCPUResources(b.Resource) { + return b.setCPUNonHotpluggable(cores, coreFraction) + } + return b.setCPUHotpluggable(cores, coreFraction) +} + +// setCPUNonHotpluggable translates cpu configuration to requests and limit in KVVM. +// Note: this is a first implementation, cpu hotplug is not compatible with this strategy. +func (b *KVVM) setCPUNonHotpluggable(cores int, coreFraction string) error { domainSpec := &b.Resource.Spec.Template.Spec.Domain if domainSpec.CPU == nil { domainSpec.CPU = &virtv1.CPU{} @@ -255,6 +278,7 @@ func (b *KVVM) SetCPU(cores int, coreFraction string) error { if err != nil { return err } + cpuLimit := GetCPULimit(cores) if domainSpec.Resources.Requests == nil { domainSpec.Resources.Requests = make(map[corev1.ResourceName]resource.Quantity) @@ -273,6 +297,38 @@ func (b *KVVM) SetCPU(cores int, coreFraction string) error { return nil } +// setCPUHotpluggable translates cpu configuration to settings in domain.cpu field. +// This field is compatible with memory hotplug. +// Also, remove requests-limits for memory if any. +// Note: we swap cores and sockets to bypass vm-validation webhook. +func (b *KVVM) setCPUHotpluggable(cores int, coreFraction string) error { + domainSpec := &b.Resource.Spec.Template.Spec.Domain + if domainSpec.CPU == nil { + domainSpec.CPU = &virtv1.CPU{} + } + + fraction, err := GetCPUFraction(coreFraction) + if err != nil { + return err + } + b.SetKVVMIAnnotation(CPUResourcesRequestsFractionAnnotation, strconv.Itoa(fraction)) + + socketsNeeded, coresPerSocketNeeded := vm.CalculateCoresAndSockets(cores) + // Use "dynamic cores" hotplug strategy. + // Workaround: swap cores and sockets in domainSpec to bypass vm-validator webhook. + b.SetKVVMIAnnotation(VCPUTopologyDynamicCoresAnnotation, "") + domainSpec.CPU.Cores = uint32(socketsNeeded) + domainSpec.CPU.Sockets = uint32(coresPerSocketNeeded) + domainSpec.CPU.MaxSockets = CPUMaxCoresPerSocket + + // Remove CPU limits and requests if set by previous implementation. + res := &b.Resource.Spec.Template.Spec.Domain.Resources + delete(res.Requests, corev1.ResourceCPU) + delete(res.Limits, corev1.ResourceCPU) + + return nil +} + // SetMemory sets memory in kvvm. // There are 2 possibilities to set memory: // 1. Use domain.memory.guest field: it enabled memory hotplugging, but not set resources.limits. @@ -338,6 +394,22 @@ func (b *KVVM) setMemoryHotpluggable(memorySize resource.Quantity) { delete(res.Limits, corev1.ResourceMemory) } +func isVMRunningWithCPUResources(kvvm *virtv1.VirtualMachine) bool { + if kvvm == nil { + return false + } + + if kvvm.Status.PrintableStatus != virtv1.VirtualMachineStatusRunning { + return false + } + + res := kvvm.Spec.Template.Spec.Domain.Resources + _, hasCPURequests := res.Requests[corev1.ResourceCPU] + _, hasCPULimits := res.Limits[corev1.ResourceCPU] + + return hasCPURequests && hasCPULimits +} + func isVMRunningWithMemoryResources(kvvm *virtv1.VirtualMachine) bool { if kvvm == nil { return false @@ -354,6 +426,38 @@ func isVMRunningWithMemoryResources(kvvm *virtv1.VirtualMachine) bool { return hasMemoryRequests && hasMemoryLimits } +func GetCPUFraction(cpuFraction string) (int, error) { + if cpuFraction == "" { + return 100, nil + } + fraction := intstr.FromString(cpuFraction) + value, _, err := getIntOrPercentValueSafely(&fraction) + if err != nil { + return 0, fmt.Errorf("invalid value for cpu fraction: %w", err) + } + return value, nil +} + +func getIntOrPercentValueSafely(intOrStr *intstr.IntOrString) (int, bool, error) { + switch intOrStr.Type { + case intstr.Int: + return intOrStr.IntValue(), false, nil + case intstr.String: + s := intOrStr.StrVal + if !strings.HasSuffix(s, "%") { + return 0, false, fmt.Errorf("invalid type: string is not a percentage") + } + s = strings.TrimSuffix(intOrStr.StrVal, "%") + + v, err := strconv.Atoi(s) + if err != nil { + return 0, false, fmt.Errorf("invalid value %q: %w", intOrStr.StrVal, err) + } + return v, true, nil + } + return 0, false, fmt.Errorf("invalid type: neither int nor percentage") +} + func GetCPURequest(cores int, coreFraction string) (*resource.Quantity, error) { if coreFraction == "" { return GetCPULimit(cores), nil diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go b/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go index 0b0f504e15..41471b19cb 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go @@ -18,6 +18,7 @@ package internal import ( "context" + "fmt" "math" "strconv" "time" @@ -30,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/deckhouse/virtualization-controller/pkg/common/vm" + "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" "github.com/deckhouse/virtualization-controller/pkg/controller/vm/internal/state" "github.com/deckhouse/virtualization/api/core/v1alpha2" ) @@ -68,7 +70,9 @@ func (h *StatisticHandler) Handle(ctx context.Context, s state.VirtualMachineSta } h.syncPods(changed, pod, pods) - h.syncResources(changed, kvvmi, pod) + if err := h.syncResources(changed, kvvmi, pod); err != nil { + return reconcile.Result{}, err + } return reconcile.Result{}, nil } @@ -80,48 +84,52 @@ func (h *StatisticHandler) Name() string { func (h *StatisticHandler) syncResources(changed *v1alpha2.VirtualMachine, kvvmi *virtv1.VirtualMachineInstance, pod *corev1.Pod, -) { +) error { if changed == nil { - return + return nil } var resources v1alpha2.ResourcesStatus switch pod { case nil: var ( - cpuKVVMIRequest resource.Quantity + cpuKVVMIRequest *resource.Quantity memorySize resource.Quantity - cores int topology v1alpha2.Topology coreFraction string ) if kvvmi == nil { memorySize = changed.Spec.Memory.Size - cores = changed.Spec.CPU.Cores - coreFraction = changed.Spec.CPU.CoreFraction - sockets, coresPerSocket := vm.CalculateCoresAndSockets(cores) + sockets, coresPerSocket := vm.CalculateCoresAndSockets(changed.Spec.CPU.Cores) topology = v1alpha2.Topology{CoresPerSocket: coresPerSocket, Sockets: sockets} + coreFraction = changed.Spec.CPU.CoreFraction } else { - cpuKVVMIRequest = kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceCPU] + var err error + cpuKVVMIRequest, err = h.getCoresRequestedByKVVMI(kvvmi) + if err != nil { + return err + } memorySize = kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceMemory] - cores = h.getCoresByKVVMI(kvvmi) coreFraction = h.getCoreFractionByKVVMI(kvvmi) topology = h.getCurrentTopologyByKVVMI(kvvmi) } resources = v1alpha2.ResourcesStatus{ CPU: v1alpha2.CPUStatus{ - Cores: cores, - CoreFraction: coreFraction, - RequestedCores: cpuKVVMIRequest, - Topology: topology, + Cores: topology.CoresPerSocket * topology.Sockets, + CoreFraction: coreFraction, + Topology: topology, }, Memory: v1alpha2.MemoryStatus{ Size: memorySize, }, } + if cpuKVVMIRequest != nil { + resources.CPU.RequestedCores = *cpuKVVMIRequest + } + default: if kvvmi == nil { - return + return nil } var ctr corev1.Container for _, container := range pod.Spec.Containers { @@ -130,15 +138,18 @@ func (h *StatisticHandler) syncResources(changed *v1alpha2.VirtualMachine, } } - cpuKVVMIRequest := kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceCPU] + coreFraction := h.getCoreFractionByKVVMI(kvvmi) + topology := h.getCurrentTopologyByKVVMI(kvvmi) + cores := topology.CoresPerSocket * topology.Sockets + + cpuFractionRequests, err := h.getCoresRequestedByKVVMI(kvvmi) + if err != nil { + return fmt.Errorf("get core fraction by kvvmi: %w", err) + } cpuPODRequest := ctr.Resources.Requests[corev1.ResourceCPU] cpuOverhead := cpuPODRequest.DeepCopy() - cpuOverhead.Sub(cpuKVVMIRequest) - - cores := h.getCoresByKVVMI(kvvmi) - coreFraction := h.getCoreFractionByKVVMI(kvvmi) - topology := h.getCurrentTopologyByKVVMI(kvvmi) + cpuOverhead.Sub(*cpuFractionRequests) memoryKVVMIRequest := kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceMemory] memoryPodRequest := ctr.Resources.Requests[corev1.ResourceMemory] @@ -152,7 +163,7 @@ func (h *StatisticHandler) syncResources(changed *v1alpha2.VirtualMachine, CPU: v1alpha2.CPUStatus{ Cores: cores, CoreFraction: coreFraction, - RequestedCores: cpuKVVMIRequest, + RequestedCores: *cpuFractionRequests, RuntimeOverhead: cpuOverhead, Topology: topology, }, @@ -163,44 +174,108 @@ func (h *StatisticHandler) syncResources(changed *v1alpha2.VirtualMachine, } } changed.Status.Resources = resources + return nil } +// getCoresByKVVMI +// TODO refactor: no need to get cores from limits after enabling CPU hotplug, kvvmi.Spec.Domain.CPU should be enough. func (h *StatisticHandler) getCoresByKVVMI(kvvmi *virtv1.VirtualMachineInstance) int { if kvvmi == nil { return -1 } - cpuKVVMILimit := kvvmi.Spec.Domain.Resources.Limits[corev1.ResourceCPU] - return int(cpuKVVMILimit.Value()) + + cpuKVVMILimit, hasLimits := kvvmi.Spec.Domain.Resources.Limits[corev1.ResourceCPU] + if hasLimits { + return int(cpuKVVMILimit.Value()) + } + + return 1 } func (h *StatisticHandler) getCoreFractionByKVVMI(kvvmi *virtv1.VirtualMachineInstance) string { if kvvmi == nil { return "" } + // Fraction is stored in annotation after enabling CPU hotplug. + cpuFractionStr, hasAnno := kvvmi.Annotations[kvbuilder.CPUResourcesRequestsFractionAnnotation] + if hasAnno { + return cpuFractionStr + "%" + } + // Also support previous implementation: calculate from requests and limits values. cpuKVVMIRequest := kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceCPU] return strconv.Itoa(int(cpuKVVMIRequest.MilliValue())*100/(h.getCoresByKVVMI(kvvmi)*1000)) + "%" } +func (h *StatisticHandler) getCoresRequestedByKVVMI(kvvmi *virtv1.VirtualMachineInstance) (*resource.Quantity, error) { + if kvvmi == nil { + return nil, nil + } + // Fraction is stored in annotation after enabling CPU hotplug. + cpuFractionStr, hasAnno := kvvmi.Annotations[kvbuilder.CPUResourcesRequestsFractionAnnotation] + if hasAnno { + if kvvmi.Spec.Domain.CPU == nil { + return nil, fmt.Errorf("enabled dynamic cores with annotation %s, but missing spec.domain.cpu", kvbuilder.CPUResourcesRequestsFractionAnnotation) + } + cores := kvvmi.Spec.Domain.CPU.Cores * kvvmi.Spec.Domain.CPU.Sockets + + cpuFraction, err := strconv.Atoi(cpuFractionStr) + if err != nil { + return nil, err + } + + if cpuFraction <= 0 || cpuFraction > 100 { + cpuFraction = 100 + } + if cpuFraction == 100 { + return resource.NewQuantity(int64(cores), resource.DecimalSI), nil + } + + // Use multiplier to calculate fraction of millis. + requested := cores * 1000 + // Round up, to always return integer number of millis. + value := int64(math.Ceil(float64(cpuFraction) * (float64(requested)) / 100)) + return resource.NewMilliQuantity(value, resource.DecimalSI), nil + } + + // Also support previous implementation: return cpu requests if set. + if reqCPU, hasCPURequests := kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceCPU]; hasCPURequests { + return &reqCPU, nil + } + + return nil, nil +} + func (h *StatisticHandler) getCurrentTopologyByKVVMI(kvvmi *virtv1.VirtualMachineInstance) v1alpha2.Topology { if kvvmi == nil { return v1alpha2.Topology{} } + cores := -1 + sockets := -1 + if kvvmi.Status.CurrentCPUTopology != nil { - return v1alpha2.Topology{ - CoresPerSocket: int(kvvmi.Status.CurrentCPUTopology.Cores), - Sockets: int(kvvmi.Status.CurrentCPUTopology.Sockets), - } + cores = int(kvvmi.Status.CurrentCPUTopology.Cores) + sockets = int(kvvmi.Status.CurrentCPUTopology.Sockets) } if kvvmi.Spec.Domain.CPU != nil { + cores = int(kvvmi.Spec.Domain.CPU.Cores) + sockets = int(kvvmi.Spec.Domain.CPU.Sockets) + } + + if _, isDynamicCores := kvvmi.Annotations[kvbuilder.VCPUTopologyDynamicCoresAnnotation]; isDynamicCores { + // Swap cores and sockets. + cores, sockets = sockets, cores + } + + if cores > 0 && sockets > 0 { return v1alpha2.Topology{ - CoresPerSocket: int(kvvmi.Spec.Domain.CPU.Cores), - Sockets: int(kvvmi.Spec.Domain.CPU.Sockets), + CoresPerSocket: cores, + Sockets: sockets, } } - cores := h.getCoresByKVVMI(kvvmi) + cores = h.getCoresByKVVMI(kvvmi) sockets, coresPerSocket := vm.CalculateCoresAndSockets(cores) return v1alpha2.Topology{CoresPerSocket: coresPerSocket, Sockets: sockets} } diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/statistic_test.go b/images/virtualization-artifact/pkg/controller/vm/internal/statistic_test.go index 06a0cb659e..2b460b9453 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/statistic_test.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/statistic_test.go @@ -30,6 +30,7 @@ import ( vmbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vm" "github.com/deckhouse/virtualization-controller/pkg/common/testutil" + "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" "github.com/deckhouse/virtualization-controller/pkg/controller/reconciler" "github.com/deckhouse/virtualization-controller/pkg/controller/vm/internal/state" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -82,6 +83,48 @@ var _ = Describe("TestStatisticHandler", func() { return kvvmi } + // Generate KVVMI with "dynamic cores" specifics: cores and sockets are intentionally + // swapped to bypass kvvm validations. + newKVVMIHotplug := func(cores, sockets, maxCores int, cpuFraction, memory, maxMemory string) *virtv1.VirtualMachineInstance { + kvvmi := newEmptyKVVMI(vmName, vmNamespace) + memoryGuest := resource.MustParse(memory) + memoryMaxGuest := resource.MustParse(maxMemory) + + kvvmi.SetAnnotations(map[string]string{ + kvbuilder.CPUResourcesRequestsFractionAnnotation: cpuFraction, + kvbuilder.VCPUTopologyDynamicCoresAnnotation: "", + }) + kvvmi.Spec = virtv1.VirtualMachineInstanceSpec{ + Domain: virtv1.DomainSpec{ + CPU: &virtv1.CPU{ + Cores: uint32(sockets), + Sockets: uint32(cores), + MaxSockets: uint32(maxCores), + Threads: 1, + }, + Memory: &virtv1.Memory{ + Guest: &memoryGuest, + MaxGuest: &memoryMaxGuest, + }, + Resources: virtv1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceMemory: memoryGuest, + }, + }, + }, + } + kvvmi.Status = virtv1.VirtualMachineInstanceStatus{ + ActivePods: map[types.UID]string{podUID: podName}, + NodeName: nodeName, + Phase: virtv1.Running, + CurrentCPUTopology: &virtv1.CPUTopology{ + Cores: uint32(sockets), + Sockets: uint32(cores), + }, + } + return kvvmi + } + newPod := func(requestCPU, limitCPU, requestMemory, limitMemory string) *corev1.Pod { pod := newEmptyPOD(podName, vmNamespace, vmName) pod.UID = podUID @@ -209,6 +252,57 @@ var _ = Describe("TestStatisticHandler", func() { TopologyCoresPerSocket: 2, TopologySockets: 1, + MemorySize: 2147483648, + MemoryRuntimeOverhead: 0, + }, + ), + Entry("Hotplug enabled: 8 cores, 100% fraction, 2 Gi", + newVM(8, ptr.To("100%"), "2Gi"), + newKVVMIHotplug(8, 1, 16, "100", "2Gi", "256Gi"), + newPod("8", "8", "2Gi", "2Gi"), + expectedValues{ + CPUCores: 8, + CPUCoreFraction: "100%", + CPURequestedCores: 8000, + CPURuntimeOverhead: 0, + + TopologyCoresPerSocket: 8, + TopologySockets: 1, + + MemorySize: 2147483648, + MemoryRuntimeOverhead: 0, + }, + ), + Entry("Hotplug enabled: 8 cores, 25% fraction, 2 Gi", + newVM(8, ptr.To("25%"), "2Gi"), + newKVVMIHotplug(8, 1, 16, "25", "2Gi", "256Gi"), + newPod("2", "8", "2Gi", "2Gi"), + expectedValues{ + CPUCores: 8, + CPUCoreFraction: "25%", + CPURequestedCores: 2000, + CPURuntimeOverhead: 0, + + TopologyCoresPerSocket: 8, + TopologySockets: 1, + + MemorySize: 2147483648, + MemoryRuntimeOverhead: 0, + }, + ), + Entry("Hotplug enabled: 1 core, 25% fraction, 2 Gi", + newVM(1, ptr.To("25%"), "2Gi"), + newKVVMIHotplug(1, 1, 16, "25", "2Gi", "256Gi"), + newPod("250m", "1", "2Gi", "2Gi"), + expectedValues{ + CPUCores: 1, + CPUCoreFraction: "25%", + CPURequestedCores: 250, + CPURuntimeOverhead: 0, + + TopologyCoresPerSocket: 1, + TopologySockets: 1, + MemorySize: 2147483648, MemoryRuntimeOverhead: 0, }, diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go b/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go index f80cf5cae7..4129a133d5 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go @@ -264,7 +264,7 @@ func (h *SyncKvvmHandler) Name() string { } func (h *SyncKvvmHandler) isWaiting(vm *v1alpha2.VirtualMachine) bool { - return !checkVirtualMachineConfiguration(vm) + return !virtualMachineDependenciesAreReady(vm) } func (h *SyncKvvmHandler) syncKVVM(ctx context.Context, s state.VirtualMachineState, allChanges vmchange.SpecChanges) (bool, error) { @@ -384,24 +384,31 @@ func (h *SyncKvvmHandler) updateKVVM(ctx context.Context, s state.VirtualMachine } if isChanged { - memory := newKVVM.Spec.Template.Spec.Domain.Memory - if memory != nil && memory.MaxGuest != nil && memory.MaxGuest.IsZero() { - // Zero maxGuest is a special value to patch KVVM to unset maxGuest. - // Set it to nil for next update call. - memory.MaxGuest = nil - - // 2 operations: remove memory.maxGuest; set memory.guest. - // Remove is not enough, remove and set are needed both to pass the kubevirt vm-validator webhook. - patchBytes, err := patch.NewJSONPatch( - patch.WithRemove("/spec/template/spec/domain/memory/maxGuest"), - patch.WithReplace("/spec/template/spec/domain/memory/guest", memory.Guest.String()), - ).Bytes() - if err != nil { - return fmt.Errorf("prepare json patch to unset memory.maxGuest: %w", err) + // Update can't handle removing of some fields, so patch them before update. + { + jsonPatch := patch.JSONPatch{} + memory := newKVVM.Spec.Template.Spec.Domain.Memory + if memory != nil && memory.MaxGuest != nil && memory.MaxGuest.IsZero() { + // maxGuest=0 is an invalid value for the vm-validator webhook, we use 0 as + // an internal special value to patch KVVM before update. + // Set it to nil in the spec, so Update call will not fail. + memory.MaxGuest = nil + + // Removing memory.maxGuest is not enough, replace memory.guest is needed to pass the vm-validator webhook. + jsonPatch.Append( + patch.WithRemove("/spec/template/spec/domain/memory/maxGuest"), + patch.WithReplace("/spec/template/spec/domain/memory/guest", memory.Guest.String()), + ) } - if err = h.client.Patch(ctx, newKVVM, client.RawPatch(types.JSONPatchType, patchBytes)); err != nil { - return fmt.Errorf("patch internal virtual machine to unset memory.maxGuest: %w", err) + if jsonPatch.Len() > 0 { + patchBytes, err := jsonPatch.Bytes() + if err != nil { + return fmt.Errorf("prepare json patch for internal virtual machine: %w", err) + } + if err = h.client.Patch(ctx, newKVVM, client.RawPatch(types.JSONPatchType, patchBytes)); err != nil { + return fmt.Errorf("patch internal virtual machine before update: %w", err) + } } } diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go b/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go index df72bb67d9..40e021a30e 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go @@ -34,6 +34,7 @@ import ( "github.com/deckhouse/virtualization-controller/pkg/common/merger" "github.com/deckhouse/virtualization-controller/pkg/common/patch" commonvm "github.com/deckhouse/virtualization-controller/pkg/common/vm" + "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" "github.com/deckhouse/virtualization-controller/pkg/controller/netmanager" "github.com/deckhouse/virtualization-controller/pkg/controller/vm/internal/state" "github.com/deckhouse/virtualization/api/core/v1alpha2" @@ -188,10 +189,20 @@ func (h *SyncMetadataHandler) patchLabelsAndAnnotations(ctx context.Context, obj return h.client.Patch(ctx, obj, client.RawPatch(types.JSONPatchType, bytes)) } +var annotationsToKeep = []string{ + annotations.AnnNetworksSpec, + virtv1.AllowPodBridgeNetworkLiveMigrationAnnotation, + netmanager.AnnoIPAddressCNIRequest, + virtv1.USBMigrationStrategyAnn, + kvbuilder.CPUResourcesRequestsFractionAnnotation, + kvbuilder.VCPUTopologyDynamicCoresAnnotation, +} + // updateKVVMSpecTemplateMetadataAnnotations ensures that the special network annotation is present if it exists. // It also removes well-known annotations that are dangerous to propagate. func (h *SyncMetadataHandler) updateKVVMSpecTemplateMetadataAnnotations(currAnno, newAnno map[string]string) map[string]string { res := make(map[string]string, len(newAnno)) + for k, v := range newAnno { if k == annotations.AnnVMLastAppliedSpec || k == annotations.AnnVMClassLastAppliedSpec { continue @@ -200,20 +211,11 @@ func (h *SyncMetadataHandler) updateKVVMSpecTemplateMetadataAnnotations(currAnno res[k] = v } - if v, ok := currAnno[annotations.AnnNetworksSpec]; ok { - res[annotations.AnnNetworksSpec] = v - } - - if v, ok := currAnno[virtv1.AllowPodBridgeNetworkLiveMigrationAnnotation]; ok { - res[virtv1.AllowPodBridgeNetworkLiveMigrationAnnotation] = v - } - - if v, ok := currAnno[netmanager.AnnoIPAddressCNIRequest]; ok { - res[netmanager.AnnoIPAddressCNIRequest] = v - } - - if v, ok := currAnno[virtv1.USBMigrationStrategyAnn]; ok { - res[virtv1.USBMigrationStrategyAnn] = v + // Restore annotations set by kvbuilder. + for _, keepAnno := range annotationsToKeep { + if v, ok := currAnno[keepAnno]; ok { + res[keepAnno] = v + } } return commonvm.RemoveNonPropagatableAnnotations(res) diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go b/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go index f3939f0ff0..fbb1143601 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go @@ -127,7 +127,7 @@ func (h *SyncPowerStateHandler) syncPowerState( }) changed := s.VirtualMachine().Changed() - isConfigurationApplied := checkVirtualMachineConfiguration(changed) + isConfigurationApplied := virtualMachineDependenciesAreReady(changed) maintenance, _ := conditions.GetCondition(vmcondition.TypeMaintenance, changed.Status.Conditions) var vmAction VMAction diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/util.go b/images/virtualization-artifact/pkg/controller/vm/internal/util.go index c47ee80776..9bd9ef9639 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/util.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/util.go @@ -95,14 +95,15 @@ type PhaseGetter func(vm *v1alpha2.VirtualMachine, kvvm *virtv1.VirtualMachine) var mapPhases = map[virtv1.VirtualMachinePrintableStatus]PhaseGetter{ // VirtualMachineStatusStopped indicates that the virtual machine is currently stopped and isn't expected to start. virtv1.VirtualMachineStatusStopped: func(vm *v1alpha2.VirtualMachine, kvvm *virtv1.VirtualMachine) v1alpha2.MachinePhase { - if vm != nil && kvvm != nil { - if !checkVirtualMachineConfiguration(vm) && - kvvm != nil && kvvm.Annotations[annotations.AnnVMStartRequested] == "true" { - return v1alpha2.MachinePending - } + if vm == nil { + return v1alpha2.MachineStopped + } + + if !virtualMachineDependenciesAreReady(vm) && kvvm.Annotations[annotations.AnnVMStartRequested] == "true" { + return v1alpha2.MachinePending } - if vm != nil && vm.Status.Phase == v1alpha2.MachinePending && + if vm.Status.Phase == v1alpha2.MachinePending && (vm.Spec.RunPolicy == v1alpha2.AlwaysOnPolicy || vm.Spec.RunPolicy == v1alpha2.AlwaysOnUnlessStoppedManually) { return v1alpha2.MachinePending } @@ -184,6 +185,11 @@ var mapPhases = map[virtv1.VirtualMachinePrintableStatus]PhaseGetter{ virtv1.VirtualMachineStatusWaitingForVolumeBinding: func(_ *v1alpha2.VirtualMachine, _ *virtv1.VirtualMachine) v1alpha2.MachinePhase { return v1alpha2.MachinePending }, + // VirtualMachineStatusWaitingForReceiver indicates that this virtual machine is a receiver VM and + // migration should start next. + virtv1.VirtualMachineStatusWaitingForReceiver: func(_ *v1alpha2.VirtualMachine, _ *virtv1.VirtualMachine) v1alpha2.MachinePhase { + return v1alpha2.MachineMigrating + }, kvvmEmptyPhase: func(_ *v1alpha2.VirtualMachine, _ *virtv1.VirtualMachine) v1alpha2.MachinePhase { return v1alpha2.MachinePending @@ -251,7 +257,8 @@ func podFinal(pod corev1.Pod) bool { return pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed } -func checkVirtualMachineConfiguration(vm *v1alpha2.VirtualMachine) bool { +// virtualMachineDependenciesAreReady returns whether VM +func virtualMachineDependenciesAreReady(vm *v1alpha2.VirtualMachine) bool { for _, c := range vm.Status.Conditions { switch vmcondition.Type(c.Type) { case vmcondition.TypeBlockDevicesReady: diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/validators/cpu_count_validator.go b/images/virtualization-artifact/pkg/controller/vm/internal/validators/cpu_count_validator.go index a2949bbd93..17c2de3639 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/validators/cpu_count_validator.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/validators/cpu_count_validator.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + commonvm "github.com/deckhouse/virtualization-controller/pkg/common/vm" "github.com/deckhouse/virtualization/api/core/v1alpha2" ) @@ -42,16 +43,11 @@ func (v *CPUCountValidator) ValidateUpdate(_ context.Context, _, newVM *v1alpha2 func (v *CPUCountValidator) Validate(vm *v1alpha2.VirtualMachine) (admission.Warnings, error) { cores := vm.Spec.CPU.Cores - switch { - case cores <= 16: + sockets, coresPerSocket := commonvm.CalculateCoresAndSockets(cores) + + if cores == sockets*coresPerSocket { return nil, nil - case cores > 16 && cores <= 32 && cores%2 != 0: - return nil, fmt.Errorf("the requested number of cores must be a multiple of 2") - case cores > 32 && cores <= 64 && cores%4 != 0: - return nil, fmt.Errorf("the requested number of cores must be a multiple of 4") - case cores > 64 && cores%8 != 0: - return nil, fmt.Errorf("the requested number of cores must be a multiple of 8") } - return nil, nil + return nil, fmt.Errorf("the requested number of cores must be a multiple of %d", sockets) } diff --git a/images/virtualization-artifact/pkg/controller/vmchange/comparator_cpu.go b/images/virtualization-artifact/pkg/controller/vmchange/comparator_cpu.go new file mode 100644 index 0000000000..945f14a48e --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vmchange/comparator_cpu.go @@ -0,0 +1,80 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmchange + +import ( + "k8s.io/component-base/featuregate" + + "github.com/deckhouse/virtualization-controller/pkg/common/vm" + "github.com/deckhouse/virtualization-controller/pkg/featuregates" + "github.com/deckhouse/virtualization/api/core/v1alpha2" +) + +type comparatorCPU struct { + featureGate featuregate.FeatureGate +} + +func NewComparatorCPU(featureGate featuregate.FeatureGate) VMSpecFieldComparator { + return &comparatorCPU{ + featureGate: featureGate, + } +} + +// Compare returns changes in the cpu section. +// // It supports CPU hotplug mechanism for cores changes. +// // It requires reboot if cpu fraction is changed or if COU hotplug is disabled. +func (c *comparatorCPU) Compare(current, desired *v1alpha2.VirtualMachineSpec) []FieldChange { + // Cores can be changed "on the fly" using CPU Hotplug ... + coresChangedAction := ActionApplyImmediate + // ... but sockets count change requires a reboot. + currentSockets, _ := vm.CalculateCoresAndSockets(current.CPU.Cores) + desiredSockets, _ := vm.CalculateCoresAndSockets(desired.CPU.Cores) + if currentSockets != desiredSockets { + coresChangedAction = ActionRestart + } + + // Require reboot if CPU hotplug is not enabled. + if !c.featureGate.Enabled(featuregates.HotplugCPUWithLiveMigration) { + coresChangedAction = ActionRestart + } + + coresChanges := compareInts("cpu.cores", current.CPU.Cores, desired.CPU.Cores, 0, coresChangedAction) + fractionChanges := compareStrings("cpu.coreFraction", current.CPU.CoreFraction, desired.CPU.CoreFraction, DefaultCPUCoreFraction, ActionRestart) + + // Yield full replace if both fields changed. + if HasChanges(coresChanges) && HasChanges(fractionChanges) { + return []FieldChange{ + { + Operation: ChangeReplace, + Path: "cpu", + CurrentValue: current.CPU, + DesiredValue: desired.CPU, + ActionRequired: ActionRestart, + }, + } + } + + if HasChanges(coresChanges) { + return coresChanges + } + + if HasChanges(fractionChanges) { + return fractionChanges + } + + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vmchange/comparators.go b/images/virtualization-artifact/pkg/controller/vmchange/comparators.go index 529d6bed7e..7de8cbf481 100644 --- a/images/virtualization-artifact/pkg/controller/vmchange/comparators.go +++ b/images/virtualization-artifact/pkg/controller/vmchange/comparators.go @@ -122,35 +122,6 @@ func compareBootloader(current, desired *v1alpha2.VirtualMachineSpec) []FieldCha ) } -// compareCPU returns changes in the cpu section. -func compareCPU(current, desired *v1alpha2.VirtualMachineSpec) []FieldChange { - coresChanges := compareInts("cpu.cores", current.CPU.Cores, desired.CPU.Cores, 0, ActionRestart) - fractionChanges := compareStrings("cpu.coreFraction", current.CPU.CoreFraction, desired.CPU.CoreFraction, DefaultCPUCoreFraction, ActionRestart) - - // Yield full replace if both fields changed. - if HasChanges(coresChanges) && HasChanges(fractionChanges) { - return []FieldChange{ - { - Operation: ChangeReplace, - Path: "cpu", - CurrentValue: current.CPU, - DesiredValue: desired.CPU, - ActionRequired: ActionRestart, - }, - } - } - - if HasChanges(coresChanges) { - return coresChanges - } - - if HasChanges(fractionChanges) { - return fractionChanges - } - - return nil -} - func compareProvisioning(current, desired *v1alpha2.VirtualMachineSpec) []FieldChange { changes := compareEmpty( "provisioning", diff --git a/images/virtualization-artifact/pkg/controller/vmchange/compare.go b/images/virtualization-artifact/pkg/controller/vmchange/compare.go index aa1f44259b..d734167761 100644 --- a/images/virtualization-artifact/pkg/controller/vmchange/compare.go +++ b/images/virtualization-artifact/pkg/controller/vmchange/compare.go @@ -75,7 +75,7 @@ func (v *VMSpecComparator) comparators() []VMSpecFieldComparator { vmSpecFieldComparator(compareEnableParavirtualization), vmSpecFieldComparator(compareOSType), vmSpecFieldComparator(compareBootloader), - vmSpecFieldComparator(compareCPU), + NewComparatorCPU(v.featureGate), NewComparatorMemory(v.featureGate), vmSpecFieldComparator(compareBlockDevices), vmSpecFieldComparator(compareProvisioning), diff --git a/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go b/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go index c2bbdeb6f2..64845b963a 100644 --- a/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go +++ b/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go @@ -18,6 +18,7 @@ package handler import ( "context" + "fmt" corev1 "k8s.io/api/core/v1" virtv1 "kubevirt.io/api/core/v1" @@ -56,7 +57,12 @@ func (h *HotplugHandler) Handle(ctx context.Context, vm *v1alpha2.VirtualMachine } cond, _ := conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceMemoryChange, kvvmi.Status.Conditions) - if cond.Status != corev1.ConditionTrue { + isMemoryHotplug := cond.Status == corev1.ConditionTrue + + cond, _ = conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceVCPUChange, kvvmi.Status.Conditions) + isCPUHotplug := cond.Status == corev1.ConditionTrue + + if !isCPUHotplug && !isMemoryHotplug { return reconcile.Result{}, nil } @@ -76,5 +82,5 @@ func (h *HotplugHandler) Name() string { } func getHotplugResourcesSum(vm *v1alpha2.VirtualMachine) string { - return vm.Spec.Memory.Size.String() + return fmt.Sprintf("cpu.cores=%d,memory.size=%s", vm.Spec.CPU.Cores, vm.Spec.Memory.Size.String()) } diff --git a/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go b/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go index 59eec3b3cd..f58f1d79be 100644 --- a/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go +++ b/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go @@ -55,8 +55,15 @@ func (w *KVVMIWatcher) Watch(mgr manager.Manager, ctr controller.Controller) err DeleteFunc: func(e event.TypedDeleteEvent[*virtv1.VirtualMachineInstance]) bool { return false }, UpdateFunc: func(e event.TypedUpdateEvent[*virtv1.VirtualMachineInstance]) bool { nodePlacementCondition, _ := conditions.GetKVVMICondition(conditions.VirtualMachineInstanceNodePlacementNotMatched, e.ObjectNew.Status.Conditions) + if nodePlacementCondition.Status == corev1.ConditionTrue { + return true + } hotMemoryChangeCondition, _ := conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceMemoryChange, e.ObjectNew.Status.Conditions) - return nodePlacementCondition.Status == corev1.ConditionTrue || hotMemoryChangeCondition.Status == corev1.ConditionTrue + if hotMemoryChangeCondition.Status == corev1.ConditionTrue { + return true + } + hotCPUChangeCondition, _ := conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceVCPUChange, e.ObjectNew.Status.Conditions) + return hotCPUChangeCondition.Status == corev1.ConditionTrue }, }, ), diff --git a/images/virtualization-artifact/pkg/featuregates/featuregate.go b/images/virtualization-artifact/pkg/featuregates/featuregate.go index 8d567ae151..3358b0a85a 100644 --- a/images/virtualization-artifact/pkg/featuregates/featuregate.go +++ b/images/virtualization-artifact/pkg/featuregates/featuregate.go @@ -30,6 +30,7 @@ const ( VolumeMigration featuregate.Feature = "VolumeMigration" TargetMigration featuregate.Feature = "TargetMigration" USB featuregate.Feature = "USB" + HotplugCPUWithLiveMigration featuregate.Feature = "HotplugCPUWithLiveMigration" HotplugMemoryWithLiveMigration featuregate.Feature = "HotplugMemoryWithLiveMigration" ) @@ -58,6 +59,11 @@ var featureSpecs = map[featuregate.Feature]featuregate.FeatureSpec{ LockToDefault: true, PreRelease: featuregate.Alpha, }, + HotplugCPUWithLiveMigration: { + Default: false, + LockToDefault: version.GetEdition() == version.EditionCE, + PreRelease: featuregate.Alpha, + }, HotplugMemoryWithLiveMigration: { Default: false, LockToDefault: version.GetEdition() == version.EditionCE, diff --git a/tools/kubeconform/fixtures/module-values.yaml b/tools/kubeconform/fixtures/module-values.yaml index 3683e0bfae..a88e1a8a56 100644 --- a/tools/kubeconform/fixtures/module-values.yaml +++ b/tools/kubeconform/fixtures/module-values.yaml @@ -397,6 +397,7 @@ virtualization: - 10.0.10.0/24 - 10.0.20.0/24 - 10.0.30.0/24 + featureGates: [] moduleState: {} virtConfig: phase: Deployed