diff --git a/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml
index a3f8d0e7c319..4a63de937b8b 100644
--- a/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml
+++ b/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml
@@ -254,6 +254,14 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
+ - jsonPath: .spec.updatePolicy.minReplicas
+ name: MinReplicas
+ priority: 1
+ type: integer
+ - jsonPath: .spec.updatePolicy.evictAfterOOMThreshold
+ name: OOMThreshold
+ priority: 1
+ type: string
name: v1
schema:
openAPIV3Schema:
@@ -425,6 +433,14 @@ spec:
If not specified, all fields in the `PodUpdatePolicy` are set to their
default values.
properties:
+ evictAfterOOMThreshold:
+ description: |-
+ EvictAfterOOMThreshold specifies the time to wait after an OOM event before
+ considering the pod for eviction. Pods that have OOMed in less than this threshold
+ since start will be evicted.
+ format: duration
+ pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
+ type: string
evictionRequirements:
description: |-
EvictionRequirements is a list of EvictionRequirements that need to
@@ -478,6 +494,10 @@ spec:
- Auto
type: string
type: object
+ x-kubernetes-validations:
+ - message: evictAfterOOMThreshold must be greater than 0
+ rule: '!has(self.evictAfterOOMThreshold) || duration(self.evictAfterOOMThreshold)
+ > duration(''0s'')'
required:
- targetRef
type: object
diff --git a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
index a3f8d0e7c319..4a63de937b8b 100644
--- a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
+++ b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
@@ -254,6 +254,14 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
+ - jsonPath: .spec.updatePolicy.minReplicas
+ name: MinReplicas
+ priority: 1
+ type: integer
+ - jsonPath: .spec.updatePolicy.evictAfterOOMThreshold
+ name: OOMThreshold
+ priority: 1
+ type: string
name: v1
schema:
openAPIV3Schema:
@@ -425,6 +433,14 @@ spec:
If not specified, all fields in the `PodUpdatePolicy` are set to their
default values.
properties:
+ evictAfterOOMThreshold:
+ description: |-
+ EvictAfterOOMThreshold specifies the time to wait after an OOM event before
+ considering the pod for eviction. Pods that have OOMed in less than this threshold
+ since start will be evicted.
+ format: duration
+ pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
+ type: string
evictionRequirements:
description: |-
EvictionRequirements is a list of EvictionRequirements that need to
@@ -478,6 +494,10 @@ spec:
- Auto
type: string
type: object
+ x-kubernetes-validations:
+ - message: evictAfterOOMThreshold must be greater than 0
+ rule: '!has(self.evictAfterOOMThreshold) || duration(self.evictAfterOOMThreshold)
+ > duration(''0s'')'
required:
- targetRef
type: object
diff --git a/vertical-pod-autoscaler/docs/flags.md b/vertical-pod-autoscaler/docs/flags.md
index 66dc939535ce..abbbd8d4e616 100644
--- a/vertical-pod-autoscaler/docs/flags.md
+++ b/vertical-pod-autoscaler/docs/flags.md
@@ -140,7 +140,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `add-dir-header` | | | If true, adds the file directory to the header of the log messages |
| `address` | string | ":8943" | The address to expose Prometheus metrics. |
| `alsologtostderr` | | | log to standard error as well as files (no effect when -logtostderr=true) |
-| `evict-after-oom-threshold` | | 10m0s | duration Evict pod that has OOMed in less than evict-after-oom-threshold since start. |
+| `evict-after-oom-threshold` | | 10m0s | duration The default duration to evict pod that has OOMed in less than evict-after-oom-threshold since start. |
| `eviction-rate-burst` | int | 1 | Burst of pods that can be evicted. |
| `eviction-rate-limit` | float | | Number of pods that can be evicted per seconds. A rate limit set to 0 or -1 will disable
the rate limiter. (default -1) |
| `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
@@ -174,4 +174,3 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `v,` | | : 4 | , --v Level set the log level verbosity (default 4) |
| `vmodule` | moduleSpec | | comma-separated list of pattern=N settings for file-filtered logging |
| `vpa-object-namespace` | string | | Specifies the namespace to search for VPA objects. Leave empty to include all namespaces. If provided, the garbage collector will only clean this namespace. |
-
diff --git a/vertical-pod-autoscaler/e2e/utils/common.go b/vertical-pod-autoscaler/e2e/utils/common.go
index 4debb5835105..cd7e6ca0c4e2 100644
--- a/vertical-pod-autoscaler/e2e/utils/common.go
+++ b/vertical-pod-autoscaler/e2e/utils/common.go
@@ -81,6 +81,9 @@ var RecommenderLabels = map[string]string{"app": "vpa-recommender"}
// HamsterLabels are labels of hamster app
var HamsterLabels = map[string]string{"app": "hamster"}
+// OOMLabels are labels for OOM test pods
+var OOMLabels = map[string]string{"app": "oom-test"}
+
// SIGDescribe adds sig-autoscaling tag to test description.
// Takes args that are passed to ginkgo.Describe.
func SIGDescribe(scenario, name string, args ...interface{}) bool {
diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go
index 2fa12fdc4a6b..3732c5da9fb9 100644
--- a/vertical-pod-autoscaler/e2e/v1/admission_controller.go
+++ b/vertical-pod-autoscaler/e2e/v1/admission_controller.go
@@ -956,7 +956,7 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomBumpUpRatio must be greater than or equal to 1.0, got -1",
},
{
- name: "Invalid oomBumpUpRatio (string value)",
+ name: "Invalid oomBumpUpRatio (less than 1)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
@@ -973,16 +973,16 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
- "oomBumpUpRatio": "not-a-number",
+ "oomBumpUpRatio": 0.5,
"oomMinBumpUp": 104857600
}]
}
}
}`,
- expectedErr: "admission webhook \"vpa\\.k8s\\.io\" denied the request: quantities must match the regular expression",
+ expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomBumpUpRatio must be greater than or equal to 1.0, got 0.5",
},
{
- name: "Invalid oomBumpUpRatio (less than 1)",
+ name: "Invalid oomMinBumpUp (negative value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
@@ -999,16 +999,16 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
- "oomBumpUpRatio": 0.5,
- "oomMinBumpUp": 104857600
+ "oomBumpUpRatio": 2,
+ "oomMinBumpUp": -1
}]
}
}
}`,
- expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomBumpUpRatio must be greater than or equal to 1.0, got 0.5",
+ expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomMinBumpUp must be greater than or equal to 0, got -1 bytes",
},
{
- name: "Invalid oomMinBumpUp (negative value)",
+ name: "Invalid oomBumpUpRatio (string value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
@@ -1025,13 +1025,13 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
- "oomBumpUpRatio": 2,
- "oomMinBumpUp": -1
+ "oomBumpUpRatio": "not-a-number",
+ "oomMinBumpUp": 104857600
}]
}
}
}`,
- expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomMinBumpUp must be greater than or equal to 0, got -1 bytes",
+ expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: quantities must match the regular expression",
},
}
for _, tc := range testCases {
diff --git a/vertical-pod-autoscaler/e2e/v1/autoscaling_utils.go b/vertical-pod-autoscaler/e2e/v1/autoscaling_utils.go
index f6cd913df0f9..80520c64f9e1 100644
--- a/vertical-pod-autoscaler/e2e/v1/autoscaling_utils.go
+++ b/vertical-pod-autoscaler/e2e/v1/autoscaling_utils.go
@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
+ "k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
@@ -444,6 +445,7 @@ func runOomingReplicationController(c clientset.Interface, ns, name string, repl
Namespace: ns,
Timeout: timeoutRC,
Replicas: replicas,
+ Labels: utils.OOMLabels,
Annotations: make(map[string]string),
MemRequest: 1024 * 1024 * 1024,
MemLimit: 1024 * 1024 * 1024,
diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go
index 386757f92450..c6414c94176c 100644
--- a/vertical-pod-autoscaler/e2e/v1/common.go
+++ b/vertical-pod-autoscaler/e2e/v1/common.go
@@ -144,6 +144,14 @@ func GetHamsterPods(f *framework.Framework) (*apiv1.PodList, error) {
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), options)
}
+// GetOOMPods returns running OOM test pods (matched by utils.OOMLabels)
+func GetOOMPods(f *framework.Framework) (*apiv1.PodList, error) {
+ // TODO(omerap12): merge GetHamsterPods and GetOOMPods functions.
+ label := labels.SelectorFromSet(labels.Set(utils.OOMLabels))
+ options := metav1.ListOptions{LabelSelector: label.String(), FieldSelector: getPodSelectorExcludingDonePodsOrDie()}
+ return f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), options)
+}
+
// NewTestCronJob returns a CronJob for test purposes.
func NewTestCronJob(name, schedule string, replicas int32) *batchv1.CronJob {
backoffLimit := utils.DefaultHamsterBackoffLimit
@@ -342,6 +350,16 @@ func CheckNoPodsEvicted(f *framework.Framework, initialPodSet PodSet) {
gomega.Expect(restarted).To(gomega.Equal(0), "there should be no pod evictions")
}
+// CheckNoPodsEvictedOOM waits for long enough period for VPA to start evicting
+// TODO(omerap12): merge this CheckNoPodsEvicted
+func CheckNoPodsEvictedOOM(f *framework.Framework, initialPodSet PodSet) {
+ time.Sleep(VpaEvictionTimeout)
+ currentPodList, err := GetOOMPods(f)
+ gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error when listing hamster pods to check number of pod evictions")
+ restarted := GetEvictedPodsCount(MakePodSet(currentPodList), initialPodSet)
+ gomega.Expect(restarted).To(gomega.Equal(0), "there should be no pod evictions")
+}
+
// WaitForUncappedCPURecommendationAbove pools VPA object until uncapped recommendation is above specified value.
// Returns polled VPA object. On timeout returns error.
func WaitForUncappedCPURecommendationAbove(c vpa_clientset.Interface, vpa *vpa_types.VerticalPodAutoscaler, minMilliCPU int64) (*vpa_types.VerticalPodAutoscaler, error) {
diff --git a/vertical-pod-autoscaler/e2e/v1/updater.go b/vertical-pod-autoscaler/e2e/v1/updater.go
index 0039565a071a..90afbc3f74f5 100644
--- a/vertical-pod-autoscaler/e2e/v1/updater.go
+++ b/vertical-pod-autoscaler/e2e/v1/updater.go
@@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+ "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test"
"k8s.io/kubernetes/test/e2e/framework"
@@ -207,6 +208,70 @@ var _ = UpdaterE2eDescribe("Updater", func() {
})
})
+var _ = UpdaterE2eDescribe("Updater with PerVPAConfig", func() {
+ const replicas = 3
+ const statusUpdateInterval = 10 * time.Second
+ f := framework.NewDefaultFramework("vertical-pod-autoscaling")
+ f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline
+
+ f.It("does not evict pods with OOM when threshold is very small", framework.WithFeatureGate(features.PerVPAConfig), func() {
+ ginkgo.By("Setting up the Admission Controller status")
+ stopCh := make(chan struct{})
+ statusUpdater := status.NewUpdater(
+ f.ClientSet,
+ status.AdmissionControllerStatusName,
+ status.AdmissionControllerStatusNamespace,
+ statusUpdateInterval,
+ "e2e test",
+ )
+ defer func() {
+ ginkgo.By("Deleting the Admission Controller status")
+ close(stopCh)
+ err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace).
+ Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{})
+ gomega.Expect(err).NotTo(gomega.HaveOccurred())
+ }()
+ statusUpdater.Run(stopCh)
+
+ ginkgo.By("Setting up a deployment that will OOM")
+ runOomingReplicationController(
+ f.ClientSet,
+ f.Namespace.Name,
+ "hamster",
+ replicas,
+ )
+
+ ginkgo.By("Waiting for pods to be created and OOM")
+ time.Sleep(10 * time.Second)
+
+ podList, err := GetOOMPods(f)
+ gomega.Expect(err).NotTo(gomega.HaveOccurred())
+ gomega.Expect(len(podList.Items)).To(gomega.BeNumerically(">", 0))
+
+ disabledThreshold := 1 * time.Second // Disable quick OOM eviction
+ ginkgo.By("Setting up a VPA CRD with very short evictAfterOOMThreshold (1 second)")
+ targetRef := &autoscaling.CrossVersionObjectReference{
+ APIVersion: "apps/v1",
+ Kind: "Deployment",
+ Name: "hamster",
+ }
+ containerName := utils.GetHamsterContainerNameByIndex(0)
+ vpaCRD := test.VerticalPodAutoscaler().
+ WithName("hamster-vpa").
+ WithNamespace(f.Namespace.Name).
+ WithTargetRef(targetRef).
+ WithUpdateMode(vpa_types.UpdateModeRecreate).
+ WithEvictAfterOOMThreshold(&metav1.Duration{Duration: disabledThreshold}).
+ WithContainer(containerName).
+ Get()
+
+ utils.InstallVPA(f, vpaCRD)
+
+ ginkgo.By("Waiting to verify pods are NOT evicted (OOM time is 1 second)")
+ CheckNoPodsEvictedOOM(f, MakePodSet(podList))
+ })
+})
+
func setupPodsForUpscalingEviction(f *framework.Framework) *apiv1.PodList {
return setupPodsForEviction(f, "100m", "100Mi", nil)
}
diff --git a/vertical-pod-autoscaler/hack/install.sh b/vertical-pod-autoscaler/hack/install.sh
new file mode 100644
index 000000000000..180189bc6b15
--- /dev/null
+++ b/vertical-pod-autoscaler/hack/install.sh
@@ -0,0 +1,447 @@
+#!/bin/sh
+set -e
+
+usage() {
+ this=$1
+ cat <] [-d] []
+ -b sets bindir or installation directory, Defaults to ./bin
+ -d turns on debug logging
+ is a tag from
+ https://github.com/golangci/golangci-lint/releases
+ If tag is missing, then the latest will be used.
+
+EOF
+ exit 2
+}
+
+parse_args() {
+ # BINDIR is ./bin unless set be ENV
+ # overridden by flag below
+
+ BINDIR=${BINDIR:-./bin}
+ while getopts "b:dh?x" arg; do
+ case "$arg" in
+ b) BINDIR="$OPTARG" ;;
+ d) log_set_priority 10 ;;
+ h | \?) usage "$0" ;;
+ x) set -x ;;
+ esac
+ done
+ shift $((OPTIND - 1))
+ TAG=$1
+}
+# this function wraps all the destructive operations
+# if a curl|bash cuts off the end of the script due to
+# network, either nothing will happen or will syntax error
+# out preventing half-done work
+execute() {
+ tmpdir=$(mktemp -d)
+ log_debug "downloading files into ${tmpdir}"
+ http_download "${tmpdir}/${TARBALL}" "${TARBALL_URL}"
+ http_download "${tmpdir}/${CHECKSUM}" "${CHECKSUM_URL}"
+ hash_sha256_verify "${tmpdir}/${TARBALL}" "${tmpdir}/${CHECKSUM}"
+ srcdir="${tmpdir}/${NAME}"
+ rm -rf "${srcdir}"
+ (cd "${tmpdir}" && untar "${TARBALL}")
+ test ! -d "${BINDIR}" && install -d "${BINDIR}"
+ for binexe in $BINARIES; do
+ if [ "$OS" = "windows" ]; then
+ binexe="${binexe}.exe"
+ fi
+ install "${srcdir}/${binexe}" "${BINDIR}/"
+ log_info "installed ${BINDIR}/${binexe}"
+ done
+ rm -rf "${tmpdir}"
+}
+get_binaries() {
+ case "$PLATFORM" in
+ darwin/amd64) BINARIES="golangci-lint" ;;
+ darwin/arm64) BINARIES="golangci-lint" ;;
+ darwin/armv6) BINARIES="golangci-lint" ;;
+ darwin/armv7) BINARIES="golangci-lint" ;;
+ darwin/mips64) BINARIES="golangci-lint" ;;
+ darwin/mips64le) BINARIES="golangci-lint" ;;
+ darwin/ppc64le) BINARIES="golangci-lint" ;;
+ darwin/s390x) BINARIES="golangci-lint" ;;
+ freebsd/386) BINARIES="golangci-lint" ;;
+ freebsd/amd64) BINARIES="golangci-lint" ;;
+ freebsd/arm64) BINARIES="golangci-lint" ;;
+ freebsd/armv6) BINARIES="golangci-lint" ;;
+ freebsd/armv7) BINARIES="golangci-lint" ;;
+ freebsd/mips64) BINARIES="golangci-lint" ;;
+ freebsd/mips64le) BINARIES="golangci-lint" ;;
+ freebsd/ppc64le) BINARIES="golangci-lint" ;;
+ freebsd/s390x) BINARIES="golangci-lint" ;;
+ illumos/amd64) BINARIES="golangci-lint" ;;
+ linux/386) BINARIES="golangci-lint" ;;
+ linux/amd64) BINARIES="golangci-lint" ;;
+ linux/arm64) BINARIES="golangci-lint" ;;
+ linux/armv6) BINARIES="golangci-lint" ;;
+ linux/armv7) BINARIES="golangci-lint" ;;
+ linux/mips64) BINARIES="golangci-lint" ;;
+ linux/mips64le) BINARIES="golangci-lint" ;;
+ linux/ppc64le) BINARIES="golangci-lint" ;;
+ linux/s390x) BINARIES="golangci-lint" ;;
+ linux/riscv64) BINARIES="golangci-lint" ;;
+ linux/loong64) BINARIES="golangci-lint" ;;
+ netbsd/386) BINARIES="golangci-lint" ;;
+ netbsd/amd64) BINARIES="golangci-lint" ;;
+ netbsd/armv6) BINARIES="golangci-lint" ;;
+ netbsd/armv7) BINARIES="golangci-lint" ;;
+ windows/386) BINARIES="golangci-lint" ;;
+ windows/amd64) BINARIES="golangci-lint" ;;
+ windows/arm64) BINARIES="golangci-lint" ;;
+ windows/armv6) BINARIES="golangci-lint" ;;
+ windows/armv7) BINARIES="golangci-lint" ;;
+ windows/mips64) BINARIES="golangci-lint" ;;
+ windows/mips64le) BINARIES="golangci-lint" ;;
+ windows/ppc64le) BINARIES="golangci-lint" ;;
+ windows/s390x) BINARIES="golangci-lint" ;;
+ *)
+ log_crit "platform $PLATFORM is not supported. Make sure this script is up-to-date and file request at https://github.com/${PREFIX}/issues/new"
+ exit 1
+ ;;
+ esac
+}
+tag_to_version() {
+ if [ -z "${TAG}" ]; then
+ log_info "checking GitHub for latest tag"
+ else
+ log_info "checking GitHub for tag '${TAG}'"
+ fi
+ REALTAG=$(github_release "$OWNER/$REPO" "${TAG}") && true
+ if test -z "$REALTAG"; then
+ log_crit "unable to find '${TAG}' - use 'latest' or see https://github.com/${PREFIX}/releases for details"
+ exit 1
+ fi
+ # if version starts with 'v', remove it
+ TAG="$REALTAG"
+ VERSION=${TAG#v}
+}
+adjust_format() {
+ # change format (tar.gz or zip) based on OS
+ case ${OS} in
+ windows) FORMAT=zip ;;
+ esac
+ true
+}
+adjust_os() {
+ # adjust archive name based on OS
+ true
+}
+adjust_arch() {
+ # adjust archive name based on ARCH
+ true
+}
+
+cat /dev/null </dev/null
+}
+echoerr() {
+ echo "$@" 1>&2
+}
+_logp=6
+log_set_priority() {
+ _logp="$1"
+}
+log_priority() {
+ if test -z "$1"; then
+ echo "$_logp"
+ return
+ fi
+ [ "$1" -le "$_logp" ]
+}
+log_tag() {
+ case $1 in
+ 0) echo "emerg" ;;
+ 1) echo "alert" ;;
+ 2) echo "crit" ;;
+ 3) echo "err" ;;
+ 4) echo "warning" ;;
+ 5) echo "notice" ;;
+ 6) echo "info" ;;
+ 7) echo "debug" ;;
+ *) echo "$1" ;;
+ esac
+}
+log_debug() {
+ log_priority 7 || return 0
+ echoerr "$(log_prefix)" "$(log_tag 7)" "$@"
+}
+log_info() {
+ log_priority 6 || return 0
+ echoerr "$(log_prefix)" "$(log_tag 6)" "$@"
+}
+log_err() {
+ log_priority 3 || return 0
+ echoerr "$(log_prefix)" "$(log_tag 3)" "$@"
+}
+log_crit() {
+ log_priority 2 || return 0
+ echoerr "$(log_prefix)" "$(log_tag 2)" "$@"
+}
+uname_os() {
+ os=$(uname -s | tr '[:upper:]' '[:lower:]')
+ case "$os" in
+ msys*) os="windows" ;;
+ mingw*) os="windows" ;;
+ cygwin*) os="windows" ;;
+ win*) os="windows" ;;
+ sunos) [ "$(uname -o)" = "illumos" ] && os=illumos ;;
+ esac
+ echo "$os"
+}
+uname_arch() {
+ arch=$(uname -m)
+ case $arch in
+ x86_64) arch="amd64" ;;
+ x86) arch="386" ;;
+ i686) arch="386" ;;
+ i386) arch="386" ;;
+ i86pc) arch="amd64" ;;
+ aarch64) arch="arm64" ;;
+ armv5*) arch="armv5" ;;
+ armv6*) arch="armv6" ;;
+ armv7*) arch="armv7" ;;
+ loongarch64) arch="loong64" ;;
+ esac
+ echo "${arch}"
+}
+uname_os_check() {
+ os=$(uname_os)
+ case "$os" in
+ darwin) return 0 ;;
+ dragonfly) return 0 ;;
+ freebsd) return 0 ;;
+ illumos) return 0;;
+ linux) return 0 ;;
+ android) return 0 ;;
+ nacl) return 0 ;;
+ netbsd) return 0 ;;
+ openbsd) return 0 ;;
+ plan9) return 0 ;;
+ solaris) return 0 ;;
+ windows) return 0 ;;
+ esac
+ log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value."
+ return 1
+}
+uname_arch_check() {
+ arch=$(uname_arch)
+ case "$arch" in
+ 386) return 0 ;;
+ amd64) return 0 ;;
+ arm64) return 0 ;;
+ armv5) return 0 ;;
+ armv6) return 0 ;;
+ armv7) return 0 ;;
+ ppc64) return 0 ;;
+ ppc64le) return 0 ;;
+ mips) return 0 ;;
+ mipsle) return 0 ;;
+ mips64) return 0 ;;
+ mips64le) return 0 ;;
+ s390x) return 0 ;;
+ riscv64) return 0 ;;
+ amd64p32) return 0 ;;
+ loong64) return 0 ;;
+ esac
+ log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value."
+ return 1
+}
+untar() {
+ tarball=$1
+ case "${tarball}" in
+ *.tar.gz | *.tgz) tar --no-same-owner -xzf "${tarball}" ;;
+ *.tar) tar --no-same-owner -xf "${tarball}" ;;
+ *.zip) unzip "${tarball}" ;;
+ *)
+ log_err "untar unknown archive format for ${tarball}"
+ return 1
+ ;;
+ esac
+}
+http_download_curl() {
+ local_file=$1
+ source_url=$2
+ header=$3
+
+ # workaround https://github.com/curl/curl/issues/13845
+ curl_version=$(curl --version | head -n 1 | awk '{ print $2 }')
+ if [ "$curl_version" = "8.8.0" ]; then
+ log_debug "http_download_curl curl $curl_version detected"
+ if [ -z "$header" ]; then
+ curl -sL -o "$local_file" "$source_url"
+ else
+ curl -sL -H "$header" -o "$local_file" "$source_url"
+
+ nf=$(cat "$local_file" | jq -r '.error // ""')
+ if [ ! -z "$nf" ]; then
+ log_debug "http_download_curl received an error: $nf"
+ return 1
+ fi
+ fi
+
+ return 0
+ fi
+
+ if [ -z "$header" ]; then
+ code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url")
+ else
+ code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url")
+ fi
+ if [ "$code" != "200" ]; then
+ log_err "http_download_curl received HTTP status $code"
+ return 1
+ fi
+ return 0
+}
+http_download_wget() {
+ local_file=$1
+ source_url=$2
+ header=$3
+ local wget_output
+ local code
+ if [ -z "$header" ]; then
+ wget_output=$(wget --server-response --quiet -O "$local_file" "$source_url" 2>&1)
+ else
+ wget_output=$(wget --server-response --quiet --header "$header" -O "$local_file" "$source_url" 2>&1)
+ fi
+ local wget_exit=$?
+ if [ $wget_exit -ne 0 ]; then
+ log_err "http_download_wget failed: wget exited with status $wget_exit"
+ return 1
+ fi
+ code=$(echo "$wget_output" | awk '/^ HTTP/{print $2}' | tail -n1)
+ if [ "$code" != "200" ]; then
+ log_err "http_download_wget received HTTP status $code"
+ return 1
+ fi
+ return 0
+}
+http_download() {
+ log_debug "http_download $2"
+ if is_command curl; then
+ http_download_curl "$@"
+ return
+ elif is_command wget; then
+ http_download_wget "$@"
+ return
+ fi
+ log_crit "http_download unable to find wget or curl"
+ return 1
+}
+http_copy() {
+ tmp=$(mktemp)
+ http_download "${tmp}" "$1" "$2" || return 1
+ body=$(cat "$tmp")
+ rm -f "${tmp}"
+ echo "$body"
+}
+github_release() {
+ owner_repo=$1
+ version=$2
+ test -z "$version" && version="latest"
+ giturl="https://github.com/${owner_repo}/releases/${version}"
+ json=$(http_copy "$giturl" "Accept:application/json")
+ test -z "$json" && return 1
+ version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//')
+ test -z "$version" && return 1
+ echo "$version"
+}
+hash_sha256() {
+ TARGET=${1:-/dev/stdin}
+ if is_command gsha256sum; then
+ hash=$(gsha256sum "$TARGET") || return 1
+ echo "$hash" | cut -d ' ' -f 1
+ elif is_command sha256sum; then
+ hash=$(sha256sum "$TARGET") || return 1
+ echo "$hash" | cut -d ' ' -f 1
+ elif is_command shasum; then
+ hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1
+ echo "$hash" | cut -d ' ' -f 1
+ elif is_command openssl; then
+ hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1
+ echo "$hash" | cut -d ' ' -f a
+ else
+ log_crit "hash_sha256 unable to find command to compute sha-256 hash"
+ return 1
+ fi
+}
+hash_sha256_verify() {
+ TARGET=$1
+ checksums=$2
+ if [ -z "$checksums" ]; then
+ log_err "hash_sha256_verify checksum file not specified in arg2"
+ return 1
+ fi
+ BASENAME=${TARGET##*/}
+ want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1)
+ if [ -z "$want" ]; then
+ log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'"
+ return 1
+ fi
+ got=$(hash_sha256 "$TARGET")
+ if [ "$want" != "$got" ]; then
+ log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got"
+ return 1
+ fi
+}
+cat /dev/null < duration('0s')",message="evictAfterOOMThreshold must be greater than 0"
type PodUpdatePolicy struct {
// Controls when autoscaler applies changes to the pod resources.
// The default is 'Auto'.
@@ -149,6 +152,15 @@ type PodUpdatePolicy struct {
// EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
// +optional
EvictionRequirements []*EvictionRequirement `json:"evictionRequirements,omitempty" protobuf:"bytes,3,opt,name=evictionRequirements"`
+
+ // EvictAfterOOMThreshold specifies the time to wait after an OOM event before
+ // considering the pod for eviction. Pods that have OOMed in less than this threshold
+ // since start will be evicted.
+ // +optional
+ // +kubebuilder:validation:Type=string
+ // +kubebuilder:validation:Format=duration
+ // +kubebuilder:validation:Pattern=`^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$`
+ EvictAfterOOMThreshold *metav1.Duration `json:"evictAfterOOMThreshold,omitempty" protobuf:"bytes,4,opt,name=evictAfterOOMThreshold"`
}
// UpdateMode controls when autoscaler applies changes to the pod resources.
diff --git a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/zz_generated.deepcopy.go b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/zz_generated.deepcopy.go
index 86ff44f69ea1..4f1aaa3fb80f 100644
--- a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/zz_generated.deepcopy.go
+++ b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/zz_generated.deepcopy.go
@@ -24,6 +24,7 @@ package v1
import (
autoscalingv1 "k8s.io/api/autoscaling/v1"
corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -178,6 +179,11 @@ func (in *PodUpdatePolicy) DeepCopyInto(out *PodUpdatePolicy) {
}
}
}
+ if in.EvictAfterOOMThreshold != nil {
+ in, out := &in.EvictAfterOOMThreshold, &out.EvictAfterOOMThreshold
+ *out = new(metav1.Duration)
+ **out = **in
+ }
return
}
diff --git a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go
index 55f2f92931e9..0b0fd39c0ac1 100644
--- a/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go
+++ b/vertical-pod-autoscaler/pkg/updater/priority/update_priority_calculator.go
@@ -29,17 +29,23 @@ import (
"k8s.io/klog/v2"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+ "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations"
vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa"
)
+const (
+ // DefaultEvictAfterOOMThreshold is the default time threshold for evicting pods after OOM
+ DefaultEvictAfterOOMThreshold = 10 * time.Minute
+)
+
var (
defaultUpdateThreshold = flag.Float64("pod-update-threshold", 0.1, "Ignore updates that have priority lower than the value of this flag")
podLifetimeUpdateThreshold = flag.Duration("in-recommendation-bounds-eviction-lifetime-threshold", time.Hour*12, "Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range")
- evictAfterOOMThreshold = flag.Duration("evict-after-oom-threshold", 10*time.Minute,
- `Evict pod that has OOMed in less than evict-after-oom-threshold since start.`)
+ evictAfterOOMThreshold = flag.Duration("evict-after-oom-threshold", DefaultEvictAfterOOMThreshold,
+ `The default duration to evict pod that has OOMed in less than evict-after-oom-threshold since start.`)
)
// UpdatePriorityCalculator is responsible for prioritizing updates on pods.
@@ -108,10 +114,11 @@ func (calc *UpdatePriorityCalculator) AddPod(pod *apiv1.Pod, now time.Time) {
klog.V(4).InfoS("Container with ContainerScalingModeOff. Skipping container quick OOM calculations", "containerName", cs.Name)
continue
}
+ evictOOMThreshold := calc.getEvictOOMThreshold()
terminationState := &cs.LastTerminationState
if terminationState.Terminated != nil &&
terminationState.Terminated.Reason == "OOMKilled" &&
- terminationState.Terminated.FinishedAt.Sub(terminationState.Terminated.StartedAt.Time) < *evictAfterOOMThreshold {
+ terminationState.Terminated.FinishedAt.Sub(terminationState.Terminated.StartedAt.Time) < evictOOMThreshold {
quickOOM = true
klog.V(2).InfoS("Quick OOM detected in pod", "pod", klog.KObj(pod), "containerName", cs.Name)
}
@@ -198,6 +205,21 @@ func (calc *UpdatePriorityCalculator) GetProcessedRecommendationTargets(r *vpa_t
return sb.String()
}
+func (calc *UpdatePriorityCalculator) getEvictOOMThreshold() time.Duration {
+ evictOOMThreshold := *evictAfterOOMThreshold
+
+ if calc.vpa.Spec.UpdatePolicy == nil || calc.vpa.Spec.UpdatePolicy.EvictAfterOOMThreshold == nil {
+ return evictOOMThreshold
+ }
+
+ if !features.Enabled(features.PerVPAConfig) {
+ klog.V(4).InfoS("feature flag is off, falling back to default EvictAfterOOMThreshold", "flagName", features.PerVPAConfig)
+ return evictOOMThreshold
+ }
+
+ return calc.vpa.Spec.UpdatePolicy.EvictAfterOOMThreshold.Duration
+}
+
func parseVpaObservedContainers(pod *apiv1.Pod) (bool, sets.Set[string]) {
observedContainers, hasObservedContainers := pod.GetAnnotations()[annotations.VpaObservedContainersLabel]
vpaContainerSet := sets.New[string]()
diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go b/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go
index 5a32bc0a2069..0fc355915aed 100644
--- a/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go
+++ b/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go
@@ -33,6 +33,7 @@ type VerticalPodAutoscalerBuilder interface {
WithContainer(containerName string) VerticalPodAutoscalerBuilder
WithNamespace(namespace string) VerticalPodAutoscalerBuilder
WithUpdateMode(updateMode vpa_types.UpdateMode) VerticalPodAutoscalerBuilder
+ WithEvictAfterOOMThreshold(*meta.Duration) VerticalPodAutoscalerBuilder
WithCreationTimestamp(timestamp time.Time) VerticalPodAutoscalerBuilder
WithMinAllowed(containerName, cpu, memory string) VerticalPodAutoscalerBuilder
WithMaxAllowed(containerName, cpu, memory string) VerticalPodAutoscalerBuilder
@@ -121,6 +122,15 @@ func (b *verticalPodAutoscalerBuilder) WithUpdateMode(updateMode vpa_types.Updat
return &c
}
+func (b *verticalPodAutoscalerBuilder) WithEvictAfterOOMThreshold(threshold *meta.Duration) VerticalPodAutoscalerBuilder {
+ c := *b
+ if c.updatePolicy == nil {
+ c.updatePolicy = &vpa_types.PodUpdatePolicy{}
+ }
+ c.updatePolicy.EvictAfterOOMThreshold = threshold
+ return &c
+}
+
func (b *verticalPodAutoscalerBuilder) WithCreationTimestamp(timestamp time.Time) VerticalPodAutoscalerBuilder {
c := *b
c.creationTimestamp = timestamp
diff --git a/vertical-pod-autoscaler/test.yaml b/vertical-pod-autoscaler/test.yaml
new file mode 100644
index 000000000000..382ff7c9fc14
--- /dev/null
+++ b/vertical-pod-autoscaler/test.yaml
@@ -0,0 +1,40 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: stress-deployment
+ namespace: default
+spec:
+ replicas: 2
+ selector:
+ matchLabels:
+ app: stress-deployment
+ template:
+ metadata:
+ labels:
+ app: stress-deployment
+ spec:
+ containers:
+ - name: stress-container
+ image: jfleach/docker-arm-stress-ng
+ resources:
+ requests:
+ cpu: 100m
+ memory: 100Mi
+ limits:
+ cpu: 200m
+ memory: 200Mi
+ args:
+ - "--cpu=4 --timeout 150000s"
+---
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+ name: cpu-consumer-vpa
+ namespace: default
+spec:
+ targetRef:
+ apiVersion: apps/v1
+ kind: Deployment
+ name: stress-deployment
+ updatePolicy:
+ updateMode: "Initial"
diff --git a/vertical-pod-autoscaler/vpa.yaml b/vertical-pod-autoscaler/vpa.yaml
new file mode 100644
index 000000000000..0de320287555
--- /dev/null
+++ b/vertical-pod-autoscaler/vpa.yaml
@@ -0,0 +1,16 @@
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+ name: hamster-vpa
+ namespace: default
+spec:
+ targetRef:
+ apiVersion: apps/v1
+ kind: Deployment
+ name: hamster
+ updatePolicy:
+ updateMode: "InPlaceOrRecreate"
+ resourcePolicy:
+ containerPolicies:
+ - containerName: hamster
+ controlledResources: ["cpu", "memory"]