From f3f58d8e1d36a035b1ec4a3d41f007a1b6383abd Mon Sep 17 00:00:00 2001 From: Yap Sok Ann Date: Mon, 24 Nov 2025 15:08:16 +0700 Subject: [PATCH] Use pod-template-hash from the server side --- charts/flagger/templates/deployment.yaml | 3 + cmd/flagger/main.go | 4 +- pkg/canary/daemonset_status.go | 3 +- pkg/canary/deployment_controller.go | 7 ++- pkg/canary/deployment_status.go | 12 +++- pkg/canary/factory.go | 4 ++ pkg/canary/knative_controller.go | 3 +- pkg/canary/service_controller.go | 3 +- pkg/canary/spec.go | 76 ++++++++++++++++++++++++ pkg/canary/status.go | 4 +- 10 files changed, 110 insertions(+), 9 deletions(-) diff --git a/charts/flagger/templates/deployment.yaml b/charts/flagger/templates/deployment.yaml index ee7d22b50..2b86324c2 100644 --- a/charts/flagger/templates/deployment.yaml +++ b/charts/flagger/templates/deployment.yaml @@ -153,6 +153,9 @@ spec: {{- if .Values.noCrossNamespaceRefs }} - -no-cross-namespace-refs={{ .Values.noCrossNamespaceRefs }} {{- end }} + {{- if .Values.useServerSideHash }} + - -use-server-side-hash={{ .Values.useServerSideHash }} + {{- end }} livenessProbe: exec: command: diff --git a/cmd/flagger/main.go b/cmd/flagger/main.go index fa7bb1a94..9ee04b966 100644 --- a/cmd/flagger/main.go +++ b/cmd/flagger/main.go @@ -88,6 +88,7 @@ var ( kubeconfigServiceMesh string clusterName string noCrossNamespaceRefs bool + useServerSideHash bool ) func init() { @@ -123,6 +124,7 @@ func init() { flag.StringVar(&kubeconfigServiceMesh, "kubeconfig-service-mesh", "", "Path to a kubeconfig for the service mesh control plane cluster.") flag.StringVar(&clusterName, "cluster-name", "", "Cluster name to be included in alert msgs.") flag.BoolVar(&noCrossNamespaceRefs, "no-cross-namespace-refs", false, "When set to true, Flagger can only refer to resources in the same namespace.") + flag.BoolVar(&useServerSideHash, "use-server-side-hash", false, "Enable server-side hash computation for deployments to avoid redeployments when upgrading Flagger versions.") } func main() { @@ -243,7 +245,7 @@ func main() { includeLabelPrefixArray := strings.Split(includeLabelPrefix, ",") - canaryFactory := canary.NewFactory(kubeClient, flaggerClient, knativeClient, configTracker, labels, includeLabelPrefixArray, logger) + canaryFactory := canary.NewFactory(kubeClient, flaggerClient, knativeClient, configTracker, labels, includeLabelPrefixArray, useServerSideHash, logger) c := controller.NewController( kubeClient, diff --git a/pkg/canary/daemonset_status.go b/pkg/canary/daemonset_status.go index 746d612b7..d2876cd6f 100644 --- a/pkg/canary/daemonset_status.go +++ b/pkg/canary/daemonset_status.go @@ -47,7 +47,8 @@ func (c *DaemonSetController) SyncStatus(cd *flaggerv1.Canary, status flaggerv1. return fmt.Errorf("GetConfigRefs failed: %w", err) } - return syncCanaryStatus(c.flaggerClient, cd, status, dae.Spec.Template, func(cdCopy *flaggerv1.Canary) { + hash := ComputeHash(dae.Spec.Template) + return syncCanaryStatus(c.flaggerClient, cd, status, hash, func(cdCopy *flaggerv1.Canary) { cdCopy.Status.TrackedConfigs = configs }) } diff --git a/pkg/canary/deployment_controller.go b/pkg/canary/deployment_controller.go index 708f4babe..66271c09c 100644 --- a/pkg/canary/deployment_controller.go +++ b/pkg/canary/deployment_controller.go @@ -42,6 +42,7 @@ type DeploymentController struct { configTracker Tracker labels []string includeLabelPrefix []string + useServerSideHash bool } // Initialize creates the primary deployment if it does not exist. @@ -144,7 +145,11 @@ func (c *DeploymentController) HasTargetChanged(cd *flaggerv1.Canary) (bool, err return false, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err) } - return hasSpecChanged(cd, canary.Spec.Template) + if c.useServerSideHash { + return hasDeploymentSpecChanged(c.kubeClient, cd, canary) + } else { + return hasSpecChanged(cd, canary.Spec.Template) + } } // ScaleToZero Scale sets the canary deployment replicas diff --git a/pkg/canary/deployment_status.go b/pkg/canary/deployment_status.go index 46027499d..470e24b7f 100644 --- a/pkg/canary/deployment_status.go +++ b/pkg/canary/deployment_status.go @@ -37,7 +37,17 @@ func (c *DeploymentController) SyncStatus(cd *flaggerv1.Canary, status flaggerv1 return fmt.Errorf("GetConfigRefs failed: %w", err) } - return syncCanaryStatus(c.flaggerClient, cd, status, dep.Spec.Template, func(cdCopy *flaggerv1.Canary) { + var hash string + if c.useServerSideHash { + hash, err = GetReplicaSetHash(c.kubeClient, dep) + if err != nil { + return fmt.Errorf("failed to compute server-side hash from deployment %s.%s: %w", cd.Spec.TargetRef.Name, cd.Namespace, err) + } + } else { + hash = ComputeHash(dep.Spec.Template) + } + + return syncCanaryStatus(c.flaggerClient, cd, status, hash, func(cdCopy *flaggerv1.Canary) { cdCopy.Status.TrackedConfigs = configs }) } diff --git a/pkg/canary/factory.go b/pkg/canary/factory.go index 37cde941d..a00f9f35d 100644 --- a/pkg/canary/factory.go +++ b/pkg/canary/factory.go @@ -33,6 +33,7 @@ type Factory struct { configTracker Tracker labels []string includeLabelPrefix []string + useServerSideHash bool } func NewFactory(kubeClient kubernetes.Interface, @@ -41,6 +42,7 @@ func NewFactory(kubeClient kubernetes.Interface, configTracker Tracker, labels []string, includeLabelPrefix []string, + useServerSideHash bool, logger *zap.SugaredLogger) *Factory { return &Factory{ kubeClient: kubeClient, @@ -50,6 +52,7 @@ func NewFactory(kubeClient kubernetes.Interface, configTracker: configTracker, labels: labels, includeLabelPrefix: includeLabelPrefix, + useServerSideHash: useServerSideHash, } } @@ -61,6 +64,7 @@ func (factory *Factory) Controller(obj v1beta1.LocalObjectReference) Controller labels: factory.labels, configTracker: factory.configTracker, includeLabelPrefix: factory.includeLabelPrefix, + useServerSideHash: factory.useServerSideHash, } daemonSetCtrl := &DaemonSetController{ logger: factory.logger, diff --git a/pkg/canary/knative_controller.go b/pkg/canary/knative_controller.go index a69a33fb0..6cc379344 100644 --- a/pkg/canary/knative_controller.go +++ b/pkg/canary/knative_controller.go @@ -64,7 +64,8 @@ func (kc *KnativeController) SyncStatus(cd *flaggerv1.Canary, status flaggerv1.C if err != nil { return fmt.Errorf("Knative Service %s.%s get query error: %w", cd.Spec.TargetRef.Name, cd.Namespace, err) } - return syncCanaryStatus(kc.flaggerClient, cd, status, service.Status.LatestCreatedRevisionName, func(copy *flaggerv1.Canary) {}) + hash := ComputeHash(service.Status.LatestCreatedRevisionName) + return syncCanaryStatus(kc.flaggerClient, cd, status, hash, func(copy *flaggerv1.Canary) {}) } // SetStatusFailedChecks updates the canary failed checks counter diff --git a/pkg/canary/service_controller.go b/pkg/canary/service_controller.go index 68b705937..599c8b850 100644 --- a/pkg/canary/service_controller.go +++ b/pkg/canary/service_controller.go @@ -247,7 +247,8 @@ func (c *ServiceController) SyncStatus(cd *flaggerv1.Canary, status flaggerv1.Ca return fmt.Errorf("service %s.%s get query error: %w", cd.Spec.TargetRef.Name, cd.Namespace, err) } - return syncCanaryStatus(c.flaggerClient, cd, status, dep.Spec, func(cdCopy *flaggerv1.Canary) {}) + hash := ComputeHash(dep.Spec) + return syncCanaryStatus(c.flaggerClient, cd, status, hash, func(cdCopy *flaggerv1.Canary) {}) } func (c *ServiceController) HaveDependenciesChanged(_ *flaggerv1.Canary) (bool, error) { diff --git a/pkg/canary/spec.go b/pkg/canary/spec.go index ff86c5b54..fd98294cb 100644 --- a/pkg/canary/spec.go +++ b/pkg/canary/spec.go @@ -17,11 +17,15 @@ limitations under the License. package canary import ( + "context" "fmt" "hash/fnv" "github.com/davecgh/go-spew/spew" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/client-go/kubernetes" flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1" ) @@ -48,6 +52,33 @@ func hasSpecChanged(cd *flaggerv1.Canary, spec interface{}) (bool, error) { return false, nil } +// hasDeploymentSpecChanged checks if the deployment's server-side generated +// pod-template-hash has changed by comparing with the last applied spec. If +// the last applied hash is different but not equal to last promoted one then +// it returns true +func hasDeploymentSpecChanged(kubeClient kubernetes.Interface, cd *flaggerv1.Canary, deployment *appsv1.Deployment) (bool, error) { + if cd.Status.LastAppliedSpec == "" { + return true, nil + } + + // Get the current server-side hash + newHash, err := GetReplicaSetHash(kubeClient, deployment) + if err != nil { + return false, fmt.Errorf("failed to get current server-side hash: %w", err) + } + + // Do not trigger a canary deployment on manual rollback + if cd.Status.LastPromotedSpec == newHash { + return false, nil + } + + if cd.Status.LastAppliedSpec != newHash { + return true, nil + } + + return false, nil +} + // ComputeHash returns a hash value calculated from a spec using the spew library // which follows pointers and prints actual values of the nested objects // ensuring the hash does not change when a pointer changes. @@ -63,3 +94,48 @@ func ComputeHash(spec interface{}) string { return rand.SafeEncodeString(fmt.Sprint(hasher.Sum32())) } + +// GetReplicaSetHash returns the pod-template-hash of the current ReplicaSet of +// the Deployment, by matching the "deployment.kubernetes.io/revision" +// annotation. As pod-template-hash is server-generated, it remains consistent +// across Kubernetes client library versions. +func GetReplicaSetHash(kubeClient kubernetes.Interface, deployment *appsv1.Deployment) (string, error) { + revisionAnnotation := "deployment.kubernetes.io/revision" + revision, exists := deployment.Annotations[revisionAnnotation] + if !exists { + return "", fmt.Errorf("missing revision annotation for %s.%s", deployment.Name, deployment.Namespace) + } + + // Get all ReplicaSets for this deployment + selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) + if err != nil { + return "", fmt.Errorf("invalid label selector for %s.%s: %w", deployment.Name, deployment.Namespace, err) + } + + replicaSets, err := kubeClient.AppsV1().ReplicaSets(deployment.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: selector.String(), + }) + if err != nil { + return "", fmt.Errorf("failed to list ReplicaSets for %s.%s: %w", deployment.Name, deployment.Namespace, err) + } + + // Find the ReplicaSet with the matching "deployment.kubernetes.io/revision" + // annotation as the Deployment. If found, return its "pod-template-hash" + // label. + for _, rs := range replicaSets.Items { + rev, exists := rs.Annotations[revisionAnnotation] + if !exists { + continue + } + if rev != revision { + continue + } + hash, exists := rs.Labels["pod-template-hash"] + if !exists { + continue + } + return hash, nil + } + + return "", fmt.Errorf("failed to find pod-template-hash from a matching ReplicaSet for %s.%s", deployment.Name, deployment.Namespace) +} diff --git a/pkg/canary/status.go b/pkg/canary/status.go index 06dc9e891..d94d1be20 100644 --- a/pkg/canary/status.go +++ b/pkg/canary/status.go @@ -29,9 +29,7 @@ import ( clientset "github.com/fluxcd/flagger/pkg/client/clientset/versioned" ) -func syncCanaryStatus(flaggerClient clientset.Interface, cd *flaggerv1.Canary, status flaggerv1.CanaryStatus, canaryResource interface{}, setAll func(cdCopy *flaggerv1.Canary)) error { - hash := ComputeHash(canaryResource) - +func syncCanaryStatus(flaggerClient clientset.Interface, cd *flaggerv1.Canary, status flaggerv1.CanaryStatus, hash string, setAll func(cdCopy *flaggerv1.Canary)) error { firstTry := true name, ns := cd.GetName(), cd.GetNamespace() err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {