Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ const (
ScaleUpInCooldown
// ScaleUpLimitedByMaxNodesTotal - the scale up wasn't attempted, because the cluster reached max nodes total
ScaleUpLimitedByMaxNodesTotal
// ScaleUpPartialCapacityAvailable - there is available capacity for some pods in a request
ScaleUpPartialCapacityAvailable
)

// WasSuccessful returns true if the scale-up was successful.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ package checkcapacity

import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -50,8 +52,17 @@ const (
// Supported values are "true" and "false" - by default ProvisioningRequests are always retried.
// Currently supported only for checkcapacity class.
NoRetryParameterKey = "noRetry"

// PartialCapacityCheckKey is a key for ProvReq's Parameters that
// will surface how many pods of a ProvReq could be scaled per the simulation.
// Supported values are "true" and "false" - by default, this is false, and
// checkCapacity will only surface whether there was capacity for all of the ProvReq Pods.
PartialCapacityCheckKey = "partialCapacityCheck"
)

// Regex to match pod names created by PodsForProvisioningRequest.
var podSetIndexPattern = regexp.MustCompile(`-(\d+)-(\d+)$`)

type checkCapacityProvClass struct {
autoscalingCtx *ca_context.AutoscalingContext
client *provreqclient.ProvisioningRequestClient
Expand Down Expand Up @@ -174,19 +185,40 @@ func (o *checkCapacityProvClass) checkCapacityBatch(reqs []provreq.ProvisioningR
func (o *checkCapacityProvClass) checkCapacity(unschedulablePods []*apiv1.Pod, provReq *provreqwrapper.ProvisioningRequest, combinedStatus *combinedStatusSet) error {
o.autoscalingCtx.ClusterSnapshot.Fork()

// Case 1: Capacity fits.
scheduled, _, err := o.schedulingSimulator.TrySchedulePods(o.autoscalingCtx.ClusterSnapshot, unschedulablePods, scheduling.ScheduleAnywhere, true)
if err == nil && len(scheduled) == len(unschedulablePods) {
sortedUnschedulablePods := sortPodsFromProvReq(unschedulablePods)

// Sets the simulation's breakOnFailure. If true, the simulation loop breaks upon a failed scheduling attempt.
simBreakOnFailure := true
partialCapacityCheck, ok := provReq.Spec.Parameters[PartialCapacityCheckKey]
if ok && partialCapacityCheck == "true" {
simBreakOnFailure = false
}

scheduled, _, err := o.schedulingSimulator.TrySchedulePods(o.autoscalingCtx.ClusterSnapshot, sortedUnschedulablePods, scheduling.ScheduleAnywhere, simBreakOnFailure)
if err == nil {
commitError := o.autoscalingCtx.ClusterSnapshot.Commit()
if commitError != nil {
o.autoscalingCtx.ClusterSnapshot.Revert()
return commitError
}
combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpSuccessful})
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.CapacityIsFoundReason, conditions.CapacityIsFoundMsg, metav1.Now())
return nil

// Case 1: Capacity Fits
if len(scheduled) == len(sortedUnschedulablePods) {
combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpSuccessful})
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.CapacityIsFoundReason, conditions.CapacityIsFoundMsg, metav1.Now())
return nil
}

// Case 2: Capacity Partially Fits
if partialCapacityCheck == "true" && len(scheduled) < len(sortedUnschedulablePods) {
combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpPartialCapacityAvailable})
msg := fmt.Sprintf("%s Can schedule %d out of %d pods.", conditions.PartialCapacityIsFoundMsg, len(scheduled), len(sortedUnschedulablePods))
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.PartialCapacityIsFoundReason, msg, metav1.Now())
return nil
}
}
// Case 2: Capacity doesn't fit.

// Case 3: Capacity doesn't fit.
o.autoscalingCtx.ClusterSnapshot.Revert()
combinedStatus.Add(&status.ScaleUpStatus{Result: status.ScaleUpNoOptionsAvailable})
if noRetry, ok := provReq.Spec.Parameters[NoRetryParameterKey]; ok && noRetry == "true" {
Expand All @@ -199,9 +231,54 @@ func (o *checkCapacityProvClass) checkCapacity(unschedulablePods []*apiv1.Pod, p
}
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionFalse, conditions.CapacityIsNotFoundReason, "Capacity is not found, CA will try to find it later.", metav1.Now())
}

return err
}

// Sort based on the pod names since they are created in the following format:
// {GenerateName}{i}-{j}, where i is the index of the PodSet in the ProvReq and
// j is the index of the pod within the PodSet.
// This assumes GenerateName will have a trailing dash.
func sortPodsFromProvReq(unschedulablePods []*apiv1.Pod) (sortedPods []*apiv1.Pod) {
sortedPods = make([]*apiv1.Pod, len(unschedulablePods))
copy(sortedPods, unschedulablePods)

sort.Slice(sortedPods, func(i, j int) bool {
podA := podSetIndexPattern.FindStringSubmatch(sortedPods[i].Name)
podB := podSetIndexPattern.FindStringSubmatch(sortedPods[j].Name)

// If both match the expected pattern, compare by indices
if len(podA) == 3 && len(podB) == 3 {
podSetIndexI, _ := strconv.Atoi(podA[1])
podIndexI, _ := strconv.Atoi(podA[2])
podSetIndexJ, _ := strconv.Atoi(podB[1])
podIndexJ, _ := strconv.Atoi(podB[2])

// Compare by PodSet index
if podSetIndexI != podSetIndexJ {
return podSetIndexI < podSetIndexJ
}
// Then by pod index within the PodSet
if podIndexI != podIndexJ {
return podIndexI < podIndexJ
}
// Use namespace then name as tiebreakers
if sortedPods[i].Namespace != sortedPods[j].Namespace {
return sortedPods[i].Namespace < sortedPods[j].Namespace
}
return sortedPods[i].Name < sortedPods[j].Name
}

// Fallback to alphabetical ordering by namespace then name if pattern doesn't match
if sortedPods[i].Namespace != sortedPods[j].Namespace {
return sortedPods[i].Namespace < sortedPods[j].Namespace
}
return sortedPods[i].Name < sortedPods[j].Name
})

return sortedPods
}

// updateRequests calls the client to update ProvisioningRequests, in parallel.
func updateRequests(client *provreqclient.ProvisioningRequestClient, prWrappers []*provreqwrapper.ProvisioningRequest, combinedStatus *combinedStatusSet) {
wg := sync.WaitGroup{}
Expand Down Expand Up @@ -234,13 +311,15 @@ type combinedStatusSet struct {
func (c *combinedStatusSet) Add(newStatus *status.ScaleUpStatus) {
// This represents the priority of the ScaleUpResult. The final result is the one with the highest priority in the set.
resultPriority := map[status.ScaleUpResult]int{
status.ScaleUpNotTried: 0,
status.ScaleUpNoOptionsAvailable: 1,
status.ScaleUpError: 2,
status.ScaleUpSuccessful: 3,
status.ScaleUpNotTried: 0,
status.ScaleUpNoOptionsAvailable: 1,
status.ScaleUpError: 2,
status.ScaleUpPartialCapacityAvailable: 3,
status.ScaleUpSuccessful: 4,
}

// If even one scaleUpSuccessful is present, the final result is ScaleUpSuccessful.
// If no ScaleUpSucessful is present, but there is a ScaleUpPartialCapacityAvailable, the final result is ScaleUpPartialCapacityAvailable.
// If no ScaleUpSuccessful is present, and even one ScaleUpError is present, the final result is ScaleUpError.
// If no ScaleUpSuccessful or ScaleUpError is present, and even one ScaleUpNoOptionsAvailable is present, the final result is ScaleUpNoOptionsAvailable.
// If no ScaleUpSuccessful, ScaleUpError or ScaleUpNoOptionsAvailable is present, the final result is ScaleUpNotTried.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import (
"testing"

"github.com/stretchr/testify/assert"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/autoscaler/cluster-autoscaler/processors/status"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
)
Expand Down Expand Up @@ -80,6 +83,27 @@ func TestCombinedStatusSet(t *testing.T) {
exportedResut: status.ScaleUpSuccessful,
exportedError: errors.NewAutoscalerError(errors.InternalError, "error 0"),
},
{
name: "all partial capacity",
statuses: generateStatuses(2, status.ScaleUpPartialCapacityAvailable),
exportedResut: status.ScaleUpPartialCapacityAvailable,
},
{
name: "successful and partial capacity",
statuses: append(generateStatuses(1, status.ScaleUpPartialCapacityAvailable), generateStatuses(1, status.ScaleUpSuccessful)...),
exportedResut: status.ScaleUpSuccessful,
},
{
name: "partial capacity and no options available",
statuses: append(generateStatuses(1, status.ScaleUpPartialCapacityAvailable), generateStatuses(1, status.ScaleUpNoOptionsAvailable)...),
exportedResut: status.ScaleUpPartialCapacityAvailable,
},
{
name: "error and partial capacity",
statuses: append(generateStatuses(1, status.ScaleUpError), generateStatuses(1, status.ScaleUpPartialCapacityAvailable)...),
exportedResut: status.ScaleUpPartialCapacityAvailable,
exportedError: errors.NewAutoscalerError(errors.InternalError, "error 0"),
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -127,3 +151,119 @@ func generateStatuses(n int, result status.ScaleUpResult) []*status.ScaleUpStatu
}
return statuses
}

func TestSortPodsFromProvReq(t *testing.T) {
testCases := []struct {
name string
input []*apiv1.Pod
expectedSortedPods []types.NamespacedName
}{
{
name: "single PodSet with multiple pods",
input: []*apiv1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-2", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "default"}},
},
expectedSortedPods: []types.NamespacedName{
{Namespace: "default", Name: "workload-0-0"},
{Namespace: "default", Name: "workload-0-1"},
{Namespace: "default", Name: "workload-0-2"},
},
},
{
name: "multiple PodSets",
input: []*apiv1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "workload-1-0", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-2-0", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-1-1", Namespace: "default"}},
},
expectedSortedPods: []types.NamespacedName{
{Namespace: "default", Name: "workload-0-0"},
{Namespace: "default", Name: "workload-0-1"},
{Namespace: "default", Name: "workload-1-0"},
{Namespace: "default", Name: "workload-1-1"},
{Namespace: "default", Name: "workload-2-0"},
},
},
{
name: "mixed with non-matching pattern - fallback to lexicographic",
input: []*apiv1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "other-pod", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}},
},
expectedSortedPods: []types.NamespacedName{
{Namespace: "default", Name: "other-pod"},
{Namespace: "default", Name: "workload-0-0"},
{Namespace: "default", Name: "workload-0-1"},
},
},
{
name: "different namespaces with same indices - namespace used as tiebreaker",
input: []*apiv1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "ns-b"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-1", Namespace: "ns-a"}},
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "ns-a"}},
},
expectedSortedPods: []types.NamespacedName{
{Namespace: "ns-a", Name: "workload-0-0"},
{Namespace: "ns-b", Name: "workload-0-0"},
{Namespace: "ns-a", Name: "workload-0-1"},
},
},
{
name: "complex PodSet indices",
input: []*apiv1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "app-10-5", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "app-2-10", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "app-2-2", Namespace: "default"}},
{ObjectMeta: metav1.ObjectMeta{Name: "app-10-0", Namespace: "default"}},
},
expectedSortedPods: []types.NamespacedName{
{Namespace: "default", Name: "app-2-2"},
{Namespace: "default", Name: "app-2-10"},
{Namespace: "default", Name: "app-10-0"},
{Namespace: "default", Name: "app-10-5"},
},
},
{
name: "empty list",
input: []*apiv1.Pod{},
expectedSortedPods: []types.NamespacedName{},
},
{
name: "single pod",
input: []*apiv1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "workload-0-0", Namespace: "default"}},
},
expectedSortedPods: []types.NamespacedName{
{Namespace: "default", Name: "workload-0-0"},
},
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

sorted := sortPodsFromProvReq(tc.input)

sortedNamespacedNames := make([]types.NamespacedName, len(sorted))
for i, pod := range sorted {
sortedNamespacedNames[i] = types.NamespacedName{
Namespace: pod.Namespace,
Name: pod.Name,
}
}

assert.Equal(t, tc.expectedSortedPods, sortedNamespacedNames, "Pods should be sorted in the correct order")

// Verify we didn't modify the number of pods
assert.Equal(t, len(tc.input), len(sorted), "Should have same number of pods")
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ const (
CapacityIsFoundReason = "CapacityIsFound"
// CapacityIsFoundMsg is added when capacity was found in the cluster.
CapacityIsFoundMsg = "Capacity is found in the cluster"
// PartialCapacityIsFoundReason is added when capacity was found in the cluster for some pods.
PartialCapacityIsFoundReason = "PartialCapacityIsFound"
// PartialCapacityIsFoundMsg is added when partial capacity was found in the cluster.
PartialCapacityIsFoundMsg = "Partial capacity is found in the cluster."
// CapacityIsProvisionedReason is added when capacity was requested successfully.
CapacityIsProvisionedReason = "CapacityIsProvisioned"
// CapacityIsProvisionedMsg is added when capacity was requested successfully.
Expand Down