From d4ca150d805795b7d3425448c70019b10756ef4f Mon Sep 17 00:00:00 2001 From: Ruinan Liu Date: Sat, 16 May 2026 00:50:58 +0000 Subject: [PATCH 1/6] Initial commit for adding multiple concurrent runs --- cmd/eno-reconciler/main.go | 32 ++++++++++- .../controllers/reconciliation/controller.go | 55 ++++++++++--------- 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/cmd/eno-reconciler/main.go b/cmd/eno-reconciler/main.go index d1b6abf8..58a702ff 100644 --- a/cmd/eno-reconciler/main.go +++ b/cmd/eno-reconciler/main.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "os" + "strconv" "strings" "time" @@ -51,9 +52,17 @@ func run() error { recOpts = reconciliation.Options{} ) + defaultMaxConcurrentReconciles := 1 + if v := os.Getenv("ENO_RECONCILER_MAX_CONCURRENT_RECONCILES"); v != "" { + n, err := strconv.Atoi(v) + if err != nil { + return fmt.Errorf("invalid ENO_RECONCILER_MAX_CONCURRENT_RECONCILES %q: %w", v, err) + } + defaultMaxConcurrentReconciles = n + } flag.BoolVar(&debugLogging, "debug", true, "Enable debug logging") flag.StringVar(&remoteKubeconfigFile, "remote-kubeconfig", "", "Path to the kubeconfig of the apiserver where the resources will be reconciled. The config from the environment is used if this is not provided") - flag.Float64Var(&remoteQPS, "remote-qps", 50, "Max requests per second to the remote apiserver") + flag.Float64Var(&remoteQPS, "remote-qps", 100, "Max requests per second to the remote apiserver") flag.DurationVar(&recOpts.Timeout, "timeout", time.Minute, "Per-resource reconciliation timeout. Avoids cases where client retries/timeouts are configured poorly and the loop gets blocked") flag.DurationVar(&recOpts.ReadinessPollInterval, "readiness-poll-interval", time.Second*5, "Interval at which non-ready resources will be checked for readiness") flag.DurationVar(&recOpts.MinReconcileInterval, "min-reconcile-interval", time.Second, "Minimum value of eno.azure.com/reconcile-interval that will be honored by the controller") @@ -66,6 +75,7 @@ func run() error { flag.BoolVar(&recOpts.FailOpen, "fail-open", false, "Report that resources are reconciled once they've been seen, even if reconciliation failed. Overridden by individual resources with 'eno.azure.io/fail-open: true|false'") flag.StringVar(&migratingFieldManagers, "migrating-field-managers", os.Getenv("MIGRATING_FIELD_MANAGERS"), "Comma-separated list of Kubernetes SSA field manager names to take ownership from during migrations") flag.StringVar(&migratingFields, "migrating-fields", os.Getenv("MIGRATING_FIELDS"), "Comma-seperated list of fields Kubernetes fields(metadata.labels, spec, stringData...) to migrate the ownership to eno") + flag.IntVar(&recOpts.MaxConcurrentReconciles, "max-concurrent-reconciles", defaultMaxConcurrentReconciles, "Maximum number of concurrent reconciles for the reconciliation controller") mgrOpts.Bind(flag.CommandLine) flag.Parse() @@ -80,6 +90,26 @@ func run() error { enoBuildVersion = os.Getenv("ENO_BUILD_VERSION") logger := logging.NewLoggerWithBuild(zl, enoBuildVersion) + logger.Info("reconciler configuration", + "debugLogging", debugLogging, + "remoteKubeconfigFile", remoteKubeconfigFile, + "remoteQPS", remoteQPS, + "timeout", recOpts.Timeout, + "readinessPollInterval", recOpts.ReadinessPollInterval, + "minReconcileInterval", recOpts.MinReconcileInterval, + "disableServerSideApply", recOpts.DisableServerSideApply, + "compositionLabelSelector", compositionSelector, + "compositionNamespace", compositionNamespace, + "resourceFilter", resourceFilter, + "namespaceCreationGracePeriod", namespaceCreationGracePeriod, + "namespaceCleanup", namespaceCleanup, + "failOpen", recOpts.FailOpen, + "migratingFieldManagers", migratingFieldManagers, + "migratingFields", migratingFields, + "maxConcurrentReconciles", recOpts.MaxConcurrentReconciles, + "enoBuildVersion", enoBuildVersion, + ) + mgrOpts.CompositionNamespace = compositionNamespace if compositionSelector != "" { var err error diff --git a/internal/controllers/reconciliation/controller.go b/internal/controllers/reconciliation/controller.go index 40e627fb..a9d81885 100644 --- a/internal/controllers/reconciliation/controller.go +++ b/internal/controllers/reconciliation/controller.go @@ -43,21 +43,24 @@ type Options struct { Timeout time.Duration ReadinessPollInterval time.Duration MinReconcileInterval time.Duration + + MaxConcurrentReconciles int } type Controller struct { - client client.Client - writeBuffer *flowcontrol.ResourceSliceWriteBuffer - resourceClient *resource.Cache - resourceFilter cel.Program - timeout time.Duration - readinessPollInterval time.Duration - upstreamClient client.Client - minReconcileInterval time.Duration - disableSSA bool - failOpen bool - migratingFieldManagers []string - migratingFields []string + client client.Client + writeBuffer *flowcontrol.ResourceSliceWriteBuffer + resourceClient *resource.Cache + resourceFilter cel.Program + timeout time.Duration + readinessPollInterval time.Duration + upstreamClient client.Client + minReconcileInterval time.Duration + disableSSA bool + failOpen bool + migratingFieldManagers []string + migratingFields []string + maxConcurrentReconciles int } func New(mgr ctrl.Manager, opts Options) error { @@ -74,18 +77,19 @@ func New(mgr ctrl.Manager, opts Options) error { } c := &Controller{ - client: opts.Manager.GetClient(), - writeBuffer: opts.WriteBuffer, - resourceClient: cache, - resourceFilter: opts.ResourceFilter, - timeout: opts.Timeout, - readinessPollInterval: opts.ReadinessPollInterval, - upstreamClient: upstreamClient, - minReconcileInterval: opts.MinReconcileInterval, - disableSSA: opts.DisableServerSideApply, - failOpen: opts.FailOpen, - migratingFieldManagers: opts.MigratingFieldManagers, - migratingFields: opts.MigratingFields, + client: opts.Manager.GetClient(), + writeBuffer: opts.WriteBuffer, + resourceClient: cache, + resourceFilter: opts.ResourceFilter, + timeout: opts.Timeout, + readinessPollInterval: opts.ReadinessPollInterval, + upstreamClient: upstreamClient, + minReconcileInterval: opts.MinReconcileInterval, + disableSSA: opts.DisableServerSideApply, + failOpen: opts.FailOpen, + migratingFieldManagers: opts.MigratingFieldManagers, + migratingFields: opts.MigratingFields, + maxConcurrentReconciles: opts.MaxConcurrentReconciles, } return builder.TypedControllerManagedBy[resource.Request](mgr). @@ -97,7 +101,8 @@ func New(mgr ctrl.Manager, opts Options) error { // // This rate limiter uses the same per-item rate limiter as the default, but without // the additional shared/global/non-item-scoped limiter. - RateLimiter: workqueue.NewTypedItemExponentialFailureRateLimiter[resource.Request](5*time.Millisecond, 1000*time.Second), + RateLimiter: workqueue.NewTypedItemExponentialFailureRateLimiter[resource.Request](5*time.Millisecond, 1000*time.Second), + MaxConcurrentReconciles: c.maxConcurrentReconciles, }). WatchesRawSource(src). Complete(c) From 0e54d612135961149bf8bfaac84da8263ce17d8b Mon Sep 17 00:00:00 2001 From: Ruinan Liu Date: Mon, 18 May 2026 18:47:01 +0000 Subject: [PATCH 2/6] Updating kind ordering and making maxreconcile configurable --- cmd/eno-reconciler/main.go | 11 +------- internal/resource/kind_ordering.go | 45 +++++++++++++++--------------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/cmd/eno-reconciler/main.go b/cmd/eno-reconciler/main.go index 58a702ff..fb455146 100644 --- a/cmd/eno-reconciler/main.go +++ b/cmd/eno-reconciler/main.go @@ -4,7 +4,6 @@ import ( "flag" "fmt" "os" - "strconv" "strings" "time" @@ -52,14 +51,6 @@ func run() error { recOpts = reconciliation.Options{} ) - defaultMaxConcurrentReconciles := 1 - if v := os.Getenv("ENO_RECONCILER_MAX_CONCURRENT_RECONCILES"); v != "" { - n, err := strconv.Atoi(v) - if err != nil { - return fmt.Errorf("invalid ENO_RECONCILER_MAX_CONCURRENT_RECONCILES %q: %w", v, err) - } - defaultMaxConcurrentReconciles = n - } flag.BoolVar(&debugLogging, "debug", true, "Enable debug logging") flag.StringVar(&remoteKubeconfigFile, "remote-kubeconfig", "", "Path to the kubeconfig of the apiserver where the resources will be reconciled. The config from the environment is used if this is not provided") flag.Float64Var(&remoteQPS, "remote-qps", 100, "Max requests per second to the remote apiserver") @@ -75,7 +66,7 @@ func run() error { flag.BoolVar(&recOpts.FailOpen, "fail-open", false, "Report that resources are reconciled once they've been seen, even if reconciliation failed. Overridden by individual resources with 'eno.azure.io/fail-open: true|false'") flag.StringVar(&migratingFieldManagers, "migrating-field-managers", os.Getenv("MIGRATING_FIELD_MANAGERS"), "Comma-separated list of Kubernetes SSA field manager names to take ownership from during migrations") flag.StringVar(&migratingFields, "migrating-fields", os.Getenv("MIGRATING_FIELDS"), "Comma-seperated list of fields Kubernetes fields(metadata.labels, spec, stringData...) to migrate the ownership to eno") - flag.IntVar(&recOpts.MaxConcurrentReconciles, "max-concurrent-reconciles", defaultMaxConcurrentReconciles, "Maximum number of concurrent reconciles for the reconciliation controller") + flag.IntVar(&recOpts.MaxConcurrentReconciles, "max-concurrent-reconciles", 1, "Maximum number of concurrent reconciles for the reconciliation controller") mgrOpts.Bind(flag.CommandLine) flag.Parse() diff --git a/internal/resource/kind_ordering.go b/internal/resource/kind_ordering.go index 406923fa..9da0bf1e 100644 --- a/internal/resource/kind_ordering.go +++ b/internal/resource/kind_ordering.go @@ -13,26 +13,27 @@ package resource var managedCreateOrder = map[string]int{ "PriorityClass": -100, "Namespace": -100, - "NetworkPolicy": -99, - "ResourceQuota": -99, - "LimitRange": -99, - "PodSecurityPolicy": -98, - "PodDisruptionBudget": -98, - "ServiceAccount": -97, - "Secret": -96, - "SecretList": -96, - "ConfigMap": -96, - "StorageClass": -95, - "PersistentVolume": -94, - "PersistentVolumeClaim": -93, - "CustomResourceDefinition": -92, - "ClusterRole": -91, - "ClusterRoleList": -91, - "ClusterRoleBinding": -91, - "ClusterRoleBindingList": -91, - "Role": -90, - "RoleList": -90, - "RoleBinding": -90, - "RoleBindingList": -90, - "Service": -89, + "NetworkPolicy": -100, + "ResourceQuota": -100, + "LimitRange": -100, + "PodSecurityPolicy": -100, + "PodDisruptionBudget": -100, + "ServiceAccount": -100, + "Secret": -100, + "SecretList": -100, + "ConfigMap": -100, + "StorageClass": -100, + "PersistentVolume": -100, + "PersistentVolumeClaim": -100, + "CustomResourceDefinition": -100, + "ClusterRole": -100, + "ClusterRoleList": -100, + "ClusterRoleBinding": -100, + "ClusterRoleBindingList": -100, + "Role": -100, + "RoleList": -100, + "RoleBinding": -100, + "RoleBindingList": -100, + "Service": -100, + "EtcdCluster": -100, } From dd9b0365dde3f7a9ca63994973777436f9f66812 Mon Sep 17 00:00:00 2001 From: Ruinan Liu Date: Mon, 18 May 2026 18:51:55 +0000 Subject: [PATCH 3/6] update comment --- internal/resource/kind_ordering.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/resource/kind_ordering.go b/internal/resource/kind_ordering.go index 9da0bf1e..1b323642 100644 --- a/internal/resource/kind_ordering.go +++ b/internal/resource/kind_ordering.go @@ -35,5 +35,6 @@ var managedCreateOrder = map[string]int{ "RoleBinding": -100, "RoleBindingList": -100, "Service": -100, - "EtcdCluster": -100, + // TO-DO: Cleanup this once ETCD will reconcile on its own + "EtcdCluster": -100, } From 5ce340b26a7226dce226faae156e9cfd10d567a2 Mon Sep 17 00:00:00 2001 From: Ruinan Liu Date: Mon, 18 May 2026 20:35:41 +0000 Subject: [PATCH 4/6] move logging location --- cmd/eno-reconciler/main.go | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/cmd/eno-reconciler/main.go b/cmd/eno-reconciler/main.go index fb455146..48e6327c 100644 --- a/cmd/eno-reconciler/main.go +++ b/cmd/eno-reconciler/main.go @@ -81,26 +81,6 @@ func run() error { enoBuildVersion = os.Getenv("ENO_BUILD_VERSION") logger := logging.NewLoggerWithBuild(zl, enoBuildVersion) - logger.Info("reconciler configuration", - "debugLogging", debugLogging, - "remoteKubeconfigFile", remoteKubeconfigFile, - "remoteQPS", remoteQPS, - "timeout", recOpts.Timeout, - "readinessPollInterval", recOpts.ReadinessPollInterval, - "minReconcileInterval", recOpts.MinReconcileInterval, - "disableServerSideApply", recOpts.DisableServerSideApply, - "compositionLabelSelector", compositionSelector, - "compositionNamespace", compositionNamespace, - "resourceFilter", resourceFilter, - "namespaceCreationGracePeriod", namespaceCreationGracePeriod, - "namespaceCleanup", namespaceCleanup, - "failOpen", recOpts.FailOpen, - "migratingFieldManagers", migratingFieldManagers, - "migratingFields", migratingFields, - "maxConcurrentReconciles", recOpts.MaxConcurrentReconciles, - "enoBuildVersion", enoBuildVersion, - ) - mgrOpts.CompositionNamespace = compositionNamespace if compositionSelector != "" { var err error @@ -170,5 +150,25 @@ func run() error { return fmt.Errorf("constructing reconciliation controller: %w", err) } + logger.Info("reconciler configuration", + "debugLogging", debugLogging, + "remoteKubeconfigFile", remoteKubeconfigFile, + "remoteQPS", remoteQPS, + "timeout", recOpts.Timeout, + "readinessPollInterval", recOpts.ReadinessPollInterval, + "minReconcileInterval", recOpts.MinReconcileInterval, + "disableServerSideApply", recOpts.DisableServerSideApply, + "compositionLabelSelector", compositionSelector, + "compositionNamespace", compositionNamespace, + "resourceFilter", resourceFilter, + "namespaceCreationGracePeriod", namespaceCreationGracePeriod, + "namespaceCleanup", namespaceCleanup, + "failOpen", recOpts.FailOpen, + "migratingFieldManagers", migratingFieldManagers, + "migratingFields", migratingFields, + "maxConcurrentReconciles", recOpts.MaxConcurrentReconciles, + "enoBuildVersion", enoBuildVersion, + ) + return mgr.Start(ctx) } From 9a981c6554c9c7ea937e2c4ea6f23f5ff66c305f Mon Sep 17 00:00:00 2001 From: Ruinan Liu Date: Mon, 18 May 2026 20:41:03 +0000 Subject: [PATCH 5/6] Updating the flag --- cmd/eno-reconciler/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/eno-reconciler/main.go b/cmd/eno-reconciler/main.go index 48e6327c..659d1ff2 100644 --- a/cmd/eno-reconciler/main.go +++ b/cmd/eno-reconciler/main.go @@ -53,7 +53,7 @@ func run() error { ) flag.BoolVar(&debugLogging, "debug", true, "Enable debug logging") flag.StringVar(&remoteKubeconfigFile, "remote-kubeconfig", "", "Path to the kubeconfig of the apiserver where the resources will be reconciled. The config from the environment is used if this is not provided") - flag.Float64Var(&remoteQPS, "remote-qps", 100, "Max requests per second to the remote apiserver") + flag.Float64Var(&remoteQPS, "remote-qps", 50, "Max requests per second to the remote apiserver") flag.DurationVar(&recOpts.Timeout, "timeout", time.Minute, "Per-resource reconciliation timeout. Avoids cases where client retries/timeouts are configured poorly and the loop gets blocked") flag.DurationVar(&recOpts.ReadinessPollInterval, "readiness-poll-interval", time.Second*5, "Interval at which non-ready resources will be checked for readiness") flag.DurationVar(&recOpts.MinReconcileInterval, "min-reconcile-interval", time.Second, "Minimum value of eno.azure.com/reconcile-interval that will be honored by the controller") From 4e6a59400b36a127910702527e24634cd86b651d Mon Sep 17 00:00:00 2001 From: Ruinan Liu Date: Mon, 18 May 2026 20:48:16 +0000 Subject: [PATCH 6/6] Fix test --- internal/resource/kind_ordering_test.go | 52 ++++++------------------- 1 file changed, 12 insertions(+), 40 deletions(-) diff --git a/internal/resource/kind_ordering_test.go b/internal/resource/kind_ordering_test.go index eb9c4216..00260ac4 100644 --- a/internal/resource/kind_ordering_test.go +++ b/internal/resource/kind_ordering_test.go @@ -28,7 +28,7 @@ func TestManagedCreateOrderCoversExpectedKinds(t *testing.T) { func TestManagedCreateOrderGroupRange(t *testing.T) { for kind, grp := range managedCreateOrder { assert.GreaterOrEqual(t, grp, -100, "kind %q group %d below minimum", kind, grp) - assert.LessOrEqual(t, grp, -81, "kind %q group %d above reserved max", kind, grp) + assert.LessOrEqual(t, grp, -60, "kind %q group %d above reserved max", kind, grp) } } @@ -58,8 +58,8 @@ func TestNewResource_DefaultOrderingForManagedKind(t *testing.T) { name: "managed kind without annotations gets defaults", manifest: `{"apiVersion":"v1","kind":"Secret", "metadata":{"name":"s","namespace":"default"}}`, - wantReadiness: -96, - wantDeletion: intPtr(96), + wantReadiness: -100, + wantDeletion: intPtr(100), }, { name: "user readiness annotation wins; deletion still defaulted", @@ -67,14 +67,14 @@ func TestNewResource_DefaultOrderingForManagedKind(t *testing.T) { "metadata":{"name":"s","namespace":"default", "annotations":{"eno.azure.io/readiness-group":"5"}}}`, wantReadiness: 5, - wantDeletion: intPtr(96), + wantDeletion: intPtr(100), }, { name: "user deletion annotation wins; readiness still defaulted", manifest: `{"apiVersion":"v1","kind":"Secret", "metadata":{"name":"s","namespace":"default", "annotations":{"eno.azure.io/deletion-group":"10"}}}`, - wantReadiness: -96, + wantReadiness: -100, wantDeletion: intPtr(10), }, { @@ -112,39 +112,11 @@ func TestNewResource_DefaultOrderingForManagedKind(t *testing.T) { func intPtr(i int) *int { return &i } -func TestManagedOrderingPrecedence(t *testing.T) { - // Namespace/PriorityClass (-100) before everything - assert.Less(t, managedCreateOrder["Namespace"], managedCreateOrder["ServiceAccount"]) - // ServiceAccount (-97) before Secret/ConfigMap (-96) - assert.Less(t, managedCreateOrder["ServiceAccount"], managedCreateOrder["Secret"]) - assert.Less(t, managedCreateOrder["ServiceAccount"], managedCreateOrder["ConfigMap"]) - // StorageClass (-95) before PV (-94) before PVC (-93) - assert.Less(t, managedCreateOrder["StorageClass"], managedCreateOrder["PersistentVolume"]) - assert.Less(t, managedCreateOrder["PersistentVolume"], managedCreateOrder["PersistentVolumeClaim"]) - // PVC (-93) before CRD (-92) - assert.Less(t, managedCreateOrder["PersistentVolumeClaim"], managedCreateOrder["CustomResourceDefinition"]) - // CRD (-92) before ClusterRole (-91) - assert.Less(t, managedCreateOrder["CustomResourceDefinition"], managedCreateOrder["ClusterRole"]) - // ClusterRole (-91) before Role (-90) - assert.Less(t, managedCreateOrder["ClusterRole"], managedCreateOrder["Role"]) - // Role (-90) before Service (-89) - assert.Less(t, managedCreateOrder["Role"], managedCreateOrder["Service"]) -} - -func TestManagedOrderingDeletionPrecedence(t *testing.T) { - // Deletion group is negation of create group, so deletion precedence is reversed. - delGrp := func(kind string) int { return -managedCreateOrder[kind] } - - // Service (+89) < Role (+90) < ClusterRole (+91) < CRD (+92) < ... < Namespace (+100) - assert.Less(t, delGrp("Service"), delGrp("Role")) - assert.Less(t, delGrp("Role"), delGrp("ClusterRole")) - assert.Less(t, delGrp("ClusterRole"), delGrp("CustomResourceDefinition")) - assert.Less(t, delGrp("CustomResourceDefinition"), delGrp("PersistentVolumeClaim")) - assert.Less(t, delGrp("PersistentVolumeClaim"), delGrp("PersistentVolume")) - assert.Less(t, delGrp("PersistentVolume"), delGrp("StorageClass")) - assert.Less(t, delGrp("StorageClass"), delGrp("ConfigMap")) - assert.Less(t, delGrp("ConfigMap"), delGrp("ServiceAccount")) - assert.Less(t, delGrp("ServiceAccount"), delGrp("Namespace")) +func TestManagedOrderingFlat(t *testing.T) { + // All managed kinds share the same reserved create group (-100) so they + // are reconciled together as infrastructure, ahead of user resources + // (whose groups must be >= -60). Deletion groups are the negation. + for kind, grp := range managedCreateOrder { + assert.Equal(t, -100, grp, "managed kind %q should be at reserved group -100", kind) + } } - -