From 48d2661110d6e952533a9fbeb6bf518bddf5e140 Mon Sep 17 00:00:00 2001 From: StephenJamesSmith Date: Tue, 16 Jun 2026 10:09:47 -0400 Subject: [PATCH] OSDOCS-19990: Resource fair sharing --- _topic_maps/_topic_map.yml | 2 + .../kueue/admission-fair-sharing.adoc | 29 ++++++++ ai_workloads/kueue/release-notes.adoc | 2 + ...usterqueue-for-admission-fair-sharing.adoc | 21 ++++++ ...e-instance-for-admission-fair-sharing.adoc | 66 +++++++++++++++++++ ...localqueue-for-admission-fair-sharing.adoc | 21 ++++++ modules/kueue-release-notes-1.4.adoc | 22 +++++++ modules/kueue-setting-resource-weights.adoc | 17 +++++ ...ing-the-admission-fair-sharing-status.adoc | 27 ++++++++ 9 files changed, 207 insertions(+) create mode 100644 ai_workloads/kueue/admission-fair-sharing.adoc create mode 100644 modules/kueue-configuring-clusterqueue-for-admission-fair-sharing.adoc create mode 100644 modules/kueue-configuring-kueue-instance-for-admission-fair-sharing.adoc create mode 100644 modules/kueue-configuring-localqueue-for-admission-fair-sharing.adoc create mode 100644 modules/kueue-release-notes-1.4.adoc create mode 100644 modules/kueue-setting-resource-weights.adoc create mode 100644 modules/kueue-verifying-the-admission-fair-sharing-status.adoc diff --git a/_topic_maps/_topic_map.yml b/_topic_maps/_topic_map.yml index 46afea466ceb..57e2f98249af 100644 --- a/_topic_maps/_topic_map.yml +++ b/_topic_maps/_topic_map.yml @@ -3511,6 +3511,8 @@ Topics: File: using-cohorts - Name: Configuring fair sharing File: configuring-fairsharing + - Name: Admission fair sharing + File: admission-fair-sharing - Name: Gang scheduling File: gangscheduling - Name: Running jobs with quota limits diff --git a/ai_workloads/kueue/admission-fair-sharing.adoc b/ai_workloads/kueue/admission-fair-sharing.adoc new file mode 100644 index 000000000000..16966f09edf4 --- /dev/null +++ b/ai_workloads/kueue/admission-fair-sharing.adoc @@ -0,0 +1,29 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="admission-fair-sharing"] += Admission fair sharing +:context: admission-fair-sharing + +toc::[] + +[role="_abstract"] +Use admission fair sharing to fairly distribute workloads across local Queues that share a single `ClusterQueue`. +This feature balances workload admission by prioritizing workloads from local Queues that have used fewer resources historically. It tracks usage over time with a configurable decay function and applies admission penalties when workloads are admitted. + +When multiple tenants share a single `ClusterQueue`, some tenants risk resource starvation. Admission fair sharing adresses this issue by meeting the following requirements: + +Enforce multi-tenant fairness (business critical):: Ensure fair distribution of cluster resources across all tenants based on their usage history. + +Improve service predictability:: Guarantee each tenant gets a consistent share of resources, reducing latency spikes and preventing starvation. + +Enable scalable governance:: Complement static quotas with dynamic, usage-based admission ordering that adapts as tenant demand changes. + +include::modules/kueue-configuring-kueue-instance-for-admission-fair-sharing.adoc[leveloffset=+1] + +include::modules/kueue-configuring-clusterqueue-for-admission-fair-sharing.adoc[leveloffset=+1] + +include::modules/kueue-configuring-localqueue-for-admission-fair-sharing.adoc[leveloffset=+1] + +include::modules/kueue-setting-resource-weights.adoc[leveloffset=+1] + +include::modules/kueue-verifying-the-admission-fair-sharing-status.adoc[leveloffset=+1] \ No newline at end of file diff --git a/ai_workloads/kueue/release-notes.adoc b/ai_workloads/kueue/release-notes.adoc index 73d043fbfdb4..9b3e27861edd 100644 --- a/ai_workloads/kueue/release-notes.adoc +++ b/ai_workloads/kueue/release-notes.adoc @@ -10,6 +10,8 @@ toc::[] include::modules/kueue-compatible-environments.adoc[leveloffset=+1] +include::modules/kueue-release-notes-1.4.adoc[leveloffset=+1] + include::modules/kueue-release-notes-1.3.1.adoc[leveloffset=+1] include::modules/kueue-release-notes-1.3.adoc[leveloffset=+1] diff --git a/modules/kueue-configuring-clusterqueue-for-admission-fair-sharing.adoc b/modules/kueue-configuring-clusterqueue-for-admission-fair-sharing.adoc new file mode 100644 index 000000000000..ad6b7eab39b0 --- /dev/null +++ b/modules/kueue-configuring-clusterqueue-for-admission-fair-sharing.adoc @@ -0,0 +1,21 @@ +// Module included in the following assemblies: +// +// * ai_workloads/kueue/admission-fair-sharing.adoc + +:_mod-docs-content-type: PROCEDURE +[id="configuring-clusterqueue-for-admission-fair-sharing_{context}"] += Configuring a cluster queue for admission fair sharing + +[role="_abstract"] +Configure the `admissionScope` section in your `ClusterQueue` object to be `UsageBasedAdmissionFairSharing`. + +.Procedure + +* Specify `UsageBasedAdmissionFairSharing` as shown in the following example: ++ +[source,yaml] +---- +admissionScope: + admissionMode: UsageBasedAdmissionFairSharing +---- + diff --git a/modules/kueue-configuring-kueue-instance-for-admission-fair-sharing.adoc b/modules/kueue-configuring-kueue-instance-for-admission-fair-sharing.adoc new file mode 100644 index 000000000000..063575e506d5 --- /dev/null +++ b/modules/kueue-configuring-kueue-instance-for-admission-fair-sharing.adoc @@ -0,0 +1,66 @@ +// Module included in the following assemblies: +// +// * ai_workloads/kueue/admission-fair-sharing.adoc + +:_mod-docs-content-type: PROCEDURE +[id="configuring-kueue-instance-for-admission-fair-sharing_{context}"] += Configuring the {kueue-name} instance for admission fair sharing + +[role="_abstract"] +Configure {kueue-name} admission fair sharing using either the `Default` or `Custom` configuration. The Default configuration uses predefined {kueue-name} values. + +.Procedure + +. Choose the `configuration` type you want to use: ++ +* `Default`: Uses {kueue-name} predefined values. +* `Custom`: Uses {kueue-name} values that you specify. + +. Apply your chosen configuration: ++ +* Use the following command to create a `Default` configuration: ++ +[source,terminal] +---- +$ oc patch kueue.kueue.openshift.io/cluster --type=merge -p \ + '{"spec":{"config":{"admissionFairSharing":{"configuration":"Default"}}}}' +---- ++ +.Example output +[source,yaml] +---- +config: + admissionFairSharing: + configuration: Default +---- ++ +* Use the following command to create a `Custom` configuration that applies values that you specify: ++ +[source,terminal] +---- +$ oc patch kueue.kueue.openshift.io/cluster --type=merge -p \ + '{"spec":{"config":{"admissionFairSharing":{"configuration":"Custom","custom":{"usageHalfLifeTimeSeconds":10,"usageSamplingIntervalSeconds":10,"resourceWeights":[{"name":"cpu","weight":"2.0"}]}}}}}' +---- ++ +.Example output +[source,yaml] +---- +config: + admissionFairSharing: + configuration: Custom + custom: + resourceWeights: + - name: cpu + weight: "2.0" + usageHalfLifeTimeSeconds: 10 + usageSamplingIntervalSeconds: 10 +---- ++ +`resourceWeights`:: Assigns weights to resources. The higher the weight, the higher the penalty. + +`usageHalfLifeTimeSeconds`:: The time in seconds after which the current usage will decrease by half. That is, it controls how long the past consumption should impact future admission. + +`usageSamplingIntervalSeconds`:: The frequency in seconds that {kueue-name} updates consumedResources in FairSharingStatus. + + + diff --git a/modules/kueue-configuring-localqueue-for-admission-fair-sharing.adoc b/modules/kueue-configuring-localqueue-for-admission-fair-sharing.adoc new file mode 100644 index 000000000000..1ab90b0c59c1 --- /dev/null +++ b/modules/kueue-configuring-localqueue-for-admission-fair-sharing.adoc @@ -0,0 +1,21 @@ +// Module included in the following assemblies: +// +// * ai_workloads/kueue/admission-fair-sharing.adoc + +:_mod-docs-content-type: PROCEDURE +[id="configuring-localqueue-for-admission-fair-sharing_{context}"] += Configuring a local queue for admission fair sharing (optional) + +[role="_abstract"] +Optionally, you can configure `fairSharing` section in your `LocalQueue` object to adjust its weight in the fair sharing calculation. The higher the weight, the lower the penalty. For example, specifying a weight of `2` treats the queue as if it is used by half as many resources. + +.Procedure + +* Specify a `weight` value as shown in the following example: ++ +[source,yaml] +---- +spec: + fairSharing: + weight: "2" +---- \ No newline at end of file diff --git a/modules/kueue-release-notes-1.4.adoc b/modules/kueue-release-notes-1.4.adoc new file mode 100644 index 000000000000..707621cb9758 --- /dev/null +++ b/modules/kueue-release-notes-1.4.adoc @@ -0,0 +1,22 @@ +/ Module included in the following assemblies: +// +// * ai_workloads/kueue/release-notes.adoc + +:_mod-docs-content-type: REFERENCE +[id="release-notes-1.4_{context}"] += Release notes for {kueue-name} version 1.4 + +[role="_abstract"] +{kueue-name} version 1.4 is a generally available release that is supported on {product-title} versions 4.18 and later. {kueue-name} version 1.4 uses link:https://kueue.sigs.k8s.io/docs/overview/[Kueue] version 0.16. + +[id="release-notes-1.4-new-features_{context}"] +== New features and enhancements + +Admission fair sharing:: +This release introduces admission fair sharing, which balances workload admission across multiple local Queues feeding into a shared `ClusterQueue`. Admission fair sharing: + + - Prioritizes workloads based on historical resource consumption + - Tracks usage over time with a configurable decay function + - Applies immediate admission penalties to prevent resource monopolization + +For more information, see xref:../../ai_workloads/kueue/admission-fair-sharing.adoc#admission-fair-sharing[Admission fair sharing]. \ No newline at end of file diff --git a/modules/kueue-setting-resource-weights.adoc b/modules/kueue-setting-resource-weights.adoc new file mode 100644 index 000000000000..42e3fb74f7da --- /dev/null +++ b/modules/kueue-setting-resource-weights.adoc @@ -0,0 +1,17 @@ +// Module included in the following assemblies: +// +// * ai_workloads/kueue/admission-fair-sharing.adoc + +:_mod-docs-content-type: CONCEPT +[id="setting-resource-weights_{context}"] += Setting resource weights + +[role="_abstract"] +Resources measured in bytes, like memory, require scaled-down `resourceWeights` values. Kubernetes +represents memory in bytes, creating values that are billions of times larger than CPU core +counts. This numeric difference makes CPU weights ineffective unless you scale memory weights +down. Without this adjustment, the raw byte value of these resources will numerically dominate human-scale resources, such as CPU cores, by several orders of magnitude, effectively making their weights meaningless. + +For example, if you want to achieve an effective memory weight of `1.0`, you would need to instead specify `9.31e-10`, which corresponds to `1.0 / 1,073,741,824`. + + diff --git a/modules/kueue-verifying-the-admission-fair-sharing-status.adoc b/modules/kueue-verifying-the-admission-fair-sharing-status.adoc new file mode 100644 index 000000000000..17dcd70615db --- /dev/null +++ b/modules/kueue-verifying-the-admission-fair-sharing-status.adoc @@ -0,0 +1,27 @@ +// Module included in the following assemblies: +// +// * ai_workloads/kueue/admission-fair-sharing.adoc + +:_mod-docs-content-type: PROCEDURE +[id="verifying-the-admission-fair-sharing-status_{context}"] += Verifying the admission fair sharing status + +[role="_abstract"] +Check the `admissionFairSharingStatus` status in the local queue. + +.Procedure + +* Use the following command to verify the status of admission fair sharing: ++ +[source,terminal] +---- +$ oc get lq -n -o jsonpath={.status.fairSharing} +---- ++ +.Example output +[source,terminal] +---- +{"admissionFairSharingStatus":{"consumedResources":{"cpu":"31999m"},"lastUpdate":"2025-06-03T14:25:15Z"},"weightedShare":0} +---- + + \ No newline at end of file