diff --git a/cluster-autoscaler/processors/status/scale_up_status_processor.go b/cluster-autoscaler/processors/status/scale_up_status_processor.go index ff242baa0d2d..71256bff4b06 100644 --- a/cluster-autoscaler/processors/status/scale_up_status_processor.go +++ b/cluster-autoscaler/processors/status/scale_up_status_processor.go @@ -68,6 +68,8 @@ const ( ScaleUpInCooldown // ScaleUpLimitedByMaxNodesTotal - the scale up wasn't attempted, because the cluster reached max nodes total ScaleUpLimitedByMaxNodesTotal + // ScaleUpPartialCapacityAvailable - there is available capacity for some pods in a request + ScaleUpPartialCapacityAvailable ) // WasSuccessful returns true if the scale-up was successful. diff --git a/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go b/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go index b6cd4f42fee2..342ece5c3db8 100644 --- a/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go +++ b/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass.go @@ -18,7 +18,9 @@ package checkcapacity import ( "fmt" + "regexp" "sort" + "strconv" "strings" "sync" "time" @@ -50,8 +52,17 @@ const ( // Supported values are "true" and "false" - by default ProvisioningRequests are always retried. // Currently supported only for checkcapacity class. NoRetryParameterKey = "noRetry" + + // PartialCapacityCheckKey is a key for ProvReq's Parameters that + // will surface how many pods of a ProvReq could be scaled per the simulation. + // Supported values are "true" and "false" - by default, this is false, and + // checkCapacity will only surface whether there was capacity for all of the ProvReq Pods. + PartialCapacityCheckKey = "partialCapacityCheck" ) +// Regex to match pod names created by PodsForProvisioningRequest. +var podSetIndexPattern = regexp.MustCompile(`-(\d+)-(\d+)$`) + type checkCapacityProvClass struct { autoscalingCtx *ca_context.AutoscalingContext client *provreqclient.ProvisioningRequestClient @@ -174,19 +185,40 @@ func (o *checkCapacityProvClass) checkCapacityBatch(reqs []provreq.ProvisioningR func (o *checkCapacityProvClass) checkCapacity(unschedulablePods []*apiv1.Pod, provReq *provreqwrapper.ProvisioningRequest, combinedStatus *combinedStatusSet) error { o.autoscalingCtx.ClusterSnapshot.Fork() - // Case 1: Capacity fits. - scheduled, _, err := o.schedulingSimulator.TrySchedulePods(o.autoscalingCtx.ClusterSnapshot, unschedulablePods, scheduling.ScheduleAnywhere, true) - if err == nil && len(scheduled) == len(unschedulablePods) { + sortedUnschedulablePods := sortPodsFromProvReq(unschedulablePods) + + // Sets the simulation's breakOnFailure. If true, the simulation loop breaks upon a failed scheduling attempt. + simBreakOnFailure := true + partialCapacityCheck, ok := provReq.Spec.Parameters[PartialCapacityCheckKey] + if ok && partialCapacityCheck == "true" { + simBreakOnFailure = false + } + + scheduled, _, err := o.schedulingSimulator.TrySchedulePods(o.autoscalingCtx.ClusterSnapshot, sortedUnschedulablePods, scheduling.ScheduleAnywhere, simBreakOnFailure) + if err == nil { commitError := o.autoscalingCtx.ClusterSnapshot.Commit() if commitError != nil { o.autoscalingCtx.ClusterSnapshot.Revert() return commitError } - combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpSuccessful}) - conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.CapacityIsFoundReason, conditions.CapacityIsFoundMsg, metav1.Now()) - return nil + + // Case 1: Capacity Fits + if len(scheduled) == len(sortedUnschedulablePods) { + combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpSuccessful}) + conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.CapacityIsFoundReason, conditions.CapacityIsFoundMsg, metav1.Now()) + return nil + } + + // Case 2: Capacity Partially Fits + if partialCapacityCheck == "true" && len(scheduled) < len(sortedUnschedulablePods) { + combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpPartialCapacityAvailable}) + msg := fmt.Sprintf("%s Can schedule %d out of %d pods.", conditions.PartialCapacityIsFoundMsg, len(scheduled), len(sortedUnschedulablePods)) + conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.PartialCapacityIsFoundReason, msg, metav1.Now()) + return nil + } } - // Case 2: Capacity doesn't fit. + + // Case 3: Capacity doesn't fit. o.autoscalingCtx.ClusterSnapshot.Revert() combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpNoOptionsAvailable}) if noRetry, ok := provReq.Spec.Parameters[NoRetryParameterKey]; ok && noRetry == "true" { @@ -199,9 +231,54 @@ func (o *checkCapacityProvClass) checkCapacity(unschedulablePods []*apiv1.Pod, p } conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionFalse, conditions.CapacityIsNotFoundReason, "Capacity is not found, CA will try to find it later.", metav1.Now()) } + return err } +// Sort based on the pod names since they are created in the following format: +// {GenerateName}{i}-{j}, where i is the index of the PodSet in the ProvReq and +// j is the index of the pod within the PodSet. +// This assumes GenerateName will have a trailing dash. +func sortPodsFromProvReq(unschedulablePods []*apiv1.Pod) (sortedPods []*apiv1.Pod) { + sortedPods = make([]*apiv1.Pod, len(unschedulablePods)) + copy(sortedPods, unschedulablePods) + + sort.Slice(sortedPods, func(i, j int) bool { + podA := podSetIndexPattern.FindStringSubmatch(sortedPods[i].Name) + podB := podSetIndexPattern.FindStringSubmatch(sortedPods[j].Name) + + // If both match the expected pattern, compare by indices + if len(podA) == 3 && len(podB) == 3 { + podSetIndexI, _ := strconv.Atoi(podA[1]) + podIndexI, _ := strconv.Atoi(podA[2]) + podSetIndexJ, _ := strconv.Atoi(podB[1]) + podIndexJ, _ := strconv.Atoi(podB[2]) + + // Compare by PodSet index + if podSetIndexI != podSetIndexJ { + return podSetIndexI < podSetIndexJ + } + // Then by pod index within the PodSet + if podIndexI != podIndexJ { + return podIndexI < podIndexJ + } + // Use namespace then name as tiebreakers + if sortedPods[i].Namespace != sortedPods[j].Namespace { + return sortedPods[i].Namespace < sortedPods[j].Namespace + } + return sortedPods[i].Name < sortedPods[j].Name + } + + // Fallback to alphabetical ordering by namespace then name if pattern doesn't match + if sortedPods[i].Namespace != sortedPods[j].Namespace { + return sortedPods[i].Namespace < sortedPods[j].Namespace + } + return sortedPods[i].Name < sortedPods[j].Name + }) + + return sortedPods +} + // updateRequests calls the client to update ProvisioningRequests, in parallel. func updateRequests(client *provreqclient.ProvisioningRequestClient, prWrappers []*provreqwrapper.ProvisioningRequest, combinedStatus *combinedStatusSet) { wg := sync.WaitGroup{} @@ -234,13 +311,15 @@ type combinedStatusSet struct { func (c *combinedStatusSet) Add(newStatus *status.ScaleUpStatus) { // This represents the priority of the ScaleUpResult. The final result is the one with the highest priority in the set. resultPriority := map[status.ScaleUpResult]int{ - status.ScaleUpNotTried: 0, - status.ScaleUpNoOptionsAvailable: 1, - status.ScaleUpError: 2, - status.ScaleUpSuccessful: 3, + status.ScaleUpNotTried: 0, + status.ScaleUpNoOptionsAvailable: 1, + status.ScaleUpError: 2, + status.ScaleUpPartialCapacityAvailable: 3, + status.ScaleUpSuccessful: 4, } // If even one scaleUpSuccessful is present, the final result is ScaleUpSuccessful. + // If no ScaleUpSucessful is present, but there is a ScaleUpPartialCapacityAvailable, the final result is ScaleUpPartialCapacityAvailable. // If no ScaleUpSuccessful is present, and even one ScaleUpError is present, the final result is ScaleUpError. // If no ScaleUpSuccessful or ScaleUpError is present, and even one ScaleUpNoOptionsAvailable is present, the final result is ScaleUpNoOptionsAvailable. // If no ScaleUpSuccessful, ScaleUpError or ScaleUpNoOptionsAvailable is present, the final result is ScaleUpNotTried. diff --git a/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass_test.go b/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass_test.go index b3b769a429f9..a54a61645e87 100644 --- a/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass_test.go +++ b/cluster-autoscaler/provisioningrequest/checkcapacity/provisioningclass_test.go @@ -21,6 +21,9 @@ import ( "testing" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/autoscaler/cluster-autoscaler/processors/status" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" ) @@ -80,6 +83,27 @@ func TestCombinedStatusSet(t *testing.T) { exportedResut: status.ScaleUpSuccessful, exportedError: errors.NewAutoscalerError(errors.InternalError, "error 0"), }, + { + name: "all partial capacity", + statuses: generateStatuses(2, status.ScaleUpPartialCapacityAvailable), + exportedResut: status.ScaleUpPartialCapacityAvailable, + }, + { + name: "successful and partial capacity", + statuses: append(generateStatuses(1, status.ScaleUpPartialCapacityAvailable), generateStatuses(1, status.ScaleUpSuccessful)...), + exportedResut: status.ScaleUpSuccessful, + }, + { + name: "partial capacity and no options available", + statuses: append(generateStatuses(1, status.ScaleUpPartialCapacityAvailable), generateStatuses(1, status.ScaleUpNoOptionsAvailable)...), + exportedResut: status.ScaleUpPartialCapacityAvailable, + }, + { + name: "error and partial capacity", + statuses: append(generateStatuses(1, status.ScaleUpError), generateStatuses(1, status.ScaleUpPartialCapacityAvailable)...), + exportedResut: status.ScaleUpPartialCapacityAvailable, + exportedError: errors.NewAutoscalerError(errors.InternalError, "error 0"), + }, } for _, tc := range testCases { @@ -127,3 +151,119 @@ func generateStatuses(n int, result status.ScaleUpResult) []*status.ScaleUpStatu } return statuses } + +func TestSortPodsFromProvReq(t *testing.T) { + testCases := []struct { + name string + input []*apiv1.Pod + expectedSortedPods []types.NamespacedName + }{ + { + name: "single PodSet with multiple pods", + input: []*apiv1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-2", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "default"}}, + }, + expectedSortedPods: []types.NamespacedName{ + {Namespace: "default", Name: "workload-0-0"}, + {Namespace: "default", Name: "workload-0-1"}, + {Namespace: "default", Name: "workload-0-2"}, + }, + }, + { + name: "multiple PodSets", + input: []*apiv1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "workload-1-0", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-2-0", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-1-1", Namespace: "default"}}, + }, + expectedSortedPods: []types.NamespacedName{ + {Namespace: "default", Name: "workload-0-0"}, + {Namespace: "default", Name: "workload-0-1"}, + {Namespace: "default", Name: "workload-1-0"}, + {Namespace: "default", Name: "workload-1-1"}, + {Namespace: "default", Name: "workload-2-0"}, + }, + }, + { + name: "mixed with non-matching pattern - fallback to lexicographic", + input: []*apiv1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "other-pod", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}}, + }, + expectedSortedPods: []types.NamespacedName{ + {Namespace: "default", Name: "other-pod"}, + {Namespace: "default", Name: "workload-0-0"}, + {Namespace: "default", Name: "workload-0-1"}, + }, + }, + { + name: "different namespaces with same indices - namespace used as tiebreaker", + input: []*apiv1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "ns-b"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "ns-a"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "ns-a"}}, + }, + expectedSortedPods: []types.NamespacedName{ + {Namespace: "ns-a", Name: "workload-0-0"}, + {Namespace: "ns-b", Name: "workload-0-0"}, + {Namespace: "ns-a", Name: "workload-0-1"}, + }, + }, + { + name: "complex PodSet indices", + input: []*apiv1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "app-10-5", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "app-2-10", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "app-2-2", Namespace: "default"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "app-10-0", Namespace: "default"}}, + }, + expectedSortedPods: []types.NamespacedName{ + {Namespace: "default", Name: "app-2-2"}, + {Namespace: "default", Name: "app-2-10"}, + {Namespace: "default", Name: "app-10-0"}, + {Namespace: "default", Name: "app-10-5"}, + }, + }, + { + name: "empty list", + input: []*apiv1.Pod{}, + expectedSortedPods: []types.NamespacedName{}, + }, + { + name: "single pod", + input: []*apiv1.Pod{ + {ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}}, + }, + expectedSortedPods: []types.NamespacedName{ + {Namespace: "default", Name: "workload-0-0"}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + sorted := sortPodsFromProvReq(tc.input) + + sortedNamespacedNames := make([]types.NamespacedName, len(sorted)) + for i, pod := range sorted { + sortedNamespacedNames[i] = types.NamespacedName{ + Namespace: pod.Namespace, + Name: pod.Name, + } + } + + assert.Equal(t, tc.expectedSortedPods, sortedNamespacedNames, "Pods should be sorted in the correct order") + + // Verify we didn't modify the number of pods + assert.Equal(t, len(tc.input), len(sorted), "Should have same number of pods") + }) + } +} diff --git a/cluster-autoscaler/provisioningrequest/conditions/conditions.go b/cluster-autoscaler/provisioningrequest/conditions/conditions.go index dc706407019c..956fe02937c8 100644 --- a/cluster-autoscaler/provisioningrequest/conditions/conditions.go +++ b/cluster-autoscaler/provisioningrequest/conditions/conditions.go @@ -36,6 +36,10 @@ const ( CapacityIsFoundReason = "CapacityIsFound" // CapacityIsFoundMsg is added when capacity was found in the cluster. CapacityIsFoundMsg = "Capacity is found in the cluster" + // PartialCapacityIsFoundReason is added when capacity was found in the cluster for some pods. + PartialCapacityIsFoundReason = "PartialCapacityIsFound" + // PartialCapacityIsFoundMsg is added when partial capacity was found in the cluster. + PartialCapacityIsFoundMsg = "Partial capacity is found in the cluster." // CapacityIsProvisionedReason is added when capacity was requested successfully. CapacityIsProvisionedReason = "CapacityIsProvisioned" // CapacityIsProvisionedMsg is added when capacity was requested successfully.