From 8b3181271d89cf8328786dc1d85a77357a07e2ea Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Wed, 11 Mar 2026 11:11:10 +0100
Subject: [PATCH 01/24] Document initial ideas to route multicluster resources
---
helm/bundles/cortex-nova/values.yaml | 18 +++++++++++
pkg/multicluster/client.go | 46 ++++++++++++++++++++++------
2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index b2dbba788..138cddcf0 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -117,6 +117,24 @@ cortex-scheduling-controllers:
enabledTasks:
- nova-decisions-cleanup-task
+ # List: iterate over all api servers with the gvk and return a combined list.
+ # Get: iterate over all api server with the gvk and return the first result that is found.
+ # Create/Update/Delete: based on the content of the resource, match api server with labels and execute op only there.
+ # WatchesMulticluster: watch resource in all clusters with the gvk.
+ multicluster:
+ # These resources will be written into the home cluster if no match is found.
+ fallbacks:
+ - gvk: "cortex.cloud/v1alpha1/Decision"
+ - # ...
+ apiservers:
+ - host: "https://api.cc-b0-qa-de-1.ccloud.external.a-qa-de-200.soil-garden.qa-de-1.cloud.sap"
+ gvks: # Used for List, Get, Watches
+ - "cortex.cloud/v1alpha1/Decision"
+ - "cortex.cloud/v1alpha1/DecisionList"
+ # ...
+ labels: # Used for Create/Update/Delete
+ az: "qa-de-1b"
+
cortex-knowledge-controllers:
<<: *cortex
namePrefix: cortex-nova-knowledge
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index d3bd88ae6..82e94582c 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -8,6 +8,7 @@ import (
"errors"
"sync"
+ hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -17,7 +18,40 @@ import (
"sigs.k8s.io/controller-runtime/pkg/cluster"
)
+type ResourceRouter interface {
+ Match(obj any, labels map[string]string) (bool, error)
+}
+
+type HypervisorResourceRouter struct{}
+
+func (h HypervisorResourceRouter) Match(obj any, labels map[string]string) (bool, error) {
+ hv, ok := obj.(hv1.Hypervisor)
+ if !ok {
+ return false, errors.New("object is not a Hypervisor")
+ }
+ // Match by hypervisor availability zone label.
+ az, ok := labels["az"]
+ if !ok {
+ return false, errors.New("object does not have availability zone label")
+ }
+ hvAZ, ok := hv.Labels["topology.kubernetes.io/zone"]
+ if !ok {
+ return false, errors.New("hypervisor does not have availability zone label")
+ }
+ return hvAZ == az, nil
+}
+
+func main() {
+ _ = Client{
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ hv1.GroupVersion.WithKind("Hypervisor"): HypervisorResourceRouter{},
+ },
+ }
+}
+
type Client struct {
+ ResourceRouters map[schema.GroupVersionKind]ResourceRouter
+
// The cluster in which cortex is deployed.
HomeCluster cluster.Cluster
// The REST config for the home cluster in which cortex is deployed.
@@ -134,17 +168,11 @@ func (c *Client) GVKFromHomeScheme(obj runtime.Object) (gvk schema.GroupVersionK
return gvks[0], nil
}
-// Get the cluster for the given group version kind.
-//
-// If this object kind does not have a remote cluster configured,
-// the home cluster is returned.
-func (c *Client) ClusterForResource(gvk schema.GroupVersionKind) cluster.Cluster {
+func (c *Client) ClusterForResource(routable Routable) cluster.Cluster {
c.remoteClustersMu.RLock()
defer c.remoteClustersMu.RUnlock()
- cl, ok := c.remoteClusters[gvk]
- if ok {
- return cl
- }
+ // TODO: Match the approprite cluster.
+
return c.HomeCluster
}
From 099e2080692a5f022a6f0687ef119fbad0054e62 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Thu, 12 Mar 2026 09:36:35 +0100
Subject: [PATCH 02/24] Add 1:n mapping in clusters
---
pkg/multicluster/builder.go | 28 +-
pkg/multicluster/builder_test.go | 73 +-
pkg/multicluster/client.go | 429 +++---
pkg/multicluster/client_test.go | 2099 ++++++++++--------------------
pkg/multicluster/routers.go | 37 +
pkg/multicluster/routers_test.go | 85 ++
6 files changed, 1083 insertions(+), 1668 deletions(-)
create mode 100644 pkg/multicluster/routers.go
create mode 100644 pkg/multicluster/routers_test.go
diff --git a/pkg/multicluster/builder.go b/pkg/multicluster/builder.go
index e5d8d7e3a..53342232b 100644
--- a/pkg/multicluster/builder.go
+++ b/pkg/multicluster/builder.go
@@ -31,21 +31,21 @@ type MulticlusterBuilder struct {
multiclusterClient *Client
}
-// Watch resources across multiple clusters.
-//
-// If the object implements Resource, we pick the right cluster based on the
-// resource URI. If your builder needs this method, pass it to the builder
-// Watch resources, potentially in a remote cluster.
-//
-// Determines the appropriate cluster by looking up the object's GroupVersionKind (GVK)
-// in the home scheme. If your builder needs this method, pass it to the builder
-// as the first call and then proceed with other builder methods.
+// WatchesMulticluster watches a resource across all clusters that serve its GVK.
+// If the GVK is served by multiple remote clusters, a watch is set up on each.
func (b MulticlusterBuilder) WatchesMulticluster(object client.Object, eventHandler handler.TypedEventHandler[client.Object, reconcile.Request], predicates ...predicate.Predicate) MulticlusterBuilder {
- cl := b.multiclusterClient.HomeCluster // default cluster
- if gvk, err := b.multiclusterClient.GVKFromHomeScheme(object); err == nil {
- cl = b.multiclusterClient.ClusterForResource(gvk)
+ gvk, err := b.multiclusterClient.GVKFromHomeScheme(object)
+ if err != nil {
+ // Fall back to home cluster if GVK lookup fails.
+ clusterCache := b.multiclusterClient.HomeCluster.GetCache()
+ b.Builder = b.WatchesRawSource(source.Kind(clusterCache, object, eventHandler, predicates...))
+ return b
+ }
+
+ // Add a watch for each remote cluster serving the GVK
+ for _, cl := range b.multiclusterClient.ClustersForGVK(gvk) {
+ clusterCache := cl.GetCache()
+ b.Builder = b.WatchesRawSource(source.Kind(clusterCache, object, eventHandler, predicates...))
}
- clusterCache := cl.GetCache()
- b.Builder = b.WatchesRawSource(source.Kind(clusterCache, object, eventHandler, predicates...))
return b
}
diff --git a/pkg/multicluster/builder_test.go b/pkg/multicluster/builder_test.go
index 628d37e15..662756484 100644
--- a/pkg/multicluster/builder_test.go
+++ b/pkg/multicluster/builder_test.go
@@ -7,74 +7,34 @@ import (
"testing"
"k8s.io/apimachinery/pkg/runtime/schema"
- "sigs.k8s.io/controller-runtime/pkg/cluster"
)
-// TestBuildController tests that BuildController creates a MulticlusterBuilder.
-// Note: Full integration testing requires a running manager, which is not
-// practical for unit tests. This test verifies the basic structure.
func TestBuildController_Structure(t *testing.T) {
- // We can't easily test BuildController without a real manager
- // because ctrl.NewControllerManagedBy requires a manager implementation.
- // Instead, we verify that MulticlusterBuilder has the expected fields.
-
- // Test that MulticlusterBuilder can be created with required fields
c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
+ remoteClusters: make(map[schema.GroupVersionKind][]remoteCluster),
}
-
- // Verify the Client field is accessible
if c.remoteClusters == nil {
t.Error("expected remoteClusters to be initialized")
}
}
-// TestMulticlusterBuilder_Fields verifies the structure of MulticlusterBuilder.
func TestMulticlusterBuilder_Fields(t *testing.T) {
- // Create a minimal client for testing
c := &Client{}
-
- // Create a MulticlusterBuilder manually to test its fields
mb := MulticlusterBuilder{
- Builder: nil, // Can't create without manager
+ Builder: nil,
multiclusterClient: c,
}
-
- // Verify multiclusterClient is set
if mb.multiclusterClient != c {
t.Error("expected multiclusterClient to be set")
}
-
- // Verify Builder can be nil initially
if mb.Builder != nil {
t.Error("expected Builder to be nil when not set")
}
}
-// TestMulticlusterBuilder_WatchesMulticluster_RequiresClient tests that
-// WatchesMulticluster requires a multicluster client.
-func TestMulticlusterBuilder_WatchesMulticluster_RequiresClient(t *testing.T) {
- // Create a client with remote clusters
+func TestClient_ClustersForGVK_Integration(t *testing.T) {
c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
- }
-
- // Verify the client can be used with the builder
- mb := MulticlusterBuilder{
- multiclusterClient: c,
- }
-
- if mb.multiclusterClient == nil {
- t.Error("expected multiclusterClient to be set")
- }
-}
-
-// TestClient_ClusterForResource_ReturnsHomeCluster tests that ClusterForResource
-// returns the home cluster when no remote cluster is configured for the GVK.
-func TestClient_ClusterForResource_Integration(t *testing.T) {
- // Test with nil remote clusters - should return home cluster
- c := &Client{
- HomeCluster: nil, // Will return nil
+ HomeCluster: nil,
remoteClusters: nil,
}
@@ -84,18 +44,16 @@ func TestClient_ClusterForResource_Integration(t *testing.T) {
Kind: "Deployment",
}
- result := c.ClusterForResource(gvk)
- if result != nil {
- t.Error("expected nil when HomeCluster is nil")
+ result := c.ClustersForGVK(gvk)
+ // With nil remoteClusters and nil HomeCluster, returns [nil].
+ if len(result) != 1 {
+ t.Errorf("expected 1 cluster, got %d", len(result))
}
}
-// TestClient_ClusterForResource_LookupOrder tests the lookup order:
-// first check remote clusters, then fall back to home cluster.
-func TestClient_ClusterForResource_LookupOrder(t *testing.T) {
- // Create client with empty remote clusters map
+func TestClient_ClustersForGVK_LookupOrder(t *testing.T) {
c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
+ remoteClusters: make(map[schema.GroupVersionKind][]remoteCluster),
}
gvk := schema.GroupVersionKind{
@@ -104,9 +62,12 @@ func TestClient_ClusterForResource_LookupOrder(t *testing.T) {
Kind: "Deployment",
}
- // Should return HomeCluster (nil) since GVK is not in remoteClusters
- result := c.ClusterForResource(gvk)
- if result != nil {
- t.Error("expected nil when GVK not in remoteClusters and HomeCluster is nil")
+ // No remote clusters for this GVK, returns home cluster (nil).
+ result := c.ClustersForGVK(gvk)
+ if len(result) != 1 {
+ t.Errorf("expected 1 cluster, got %d", len(result))
+ }
+ if result[0] != nil {
+ t.Error("expected nil HomeCluster")
}
}
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 82e94582c..8e2411a26 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -6,9 +6,10 @@ package multicluster
import (
"context"
"errors"
+ "fmt"
"sync"
- hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -18,38 +19,15 @@ import (
"sigs.k8s.io/controller-runtime/pkg/cluster"
)
-type ResourceRouter interface {
- Match(obj any, labels map[string]string) (bool, error)
-}
-
-type HypervisorResourceRouter struct{}
-
-func (h HypervisorResourceRouter) Match(obj any, labels map[string]string) (bool, error) {
- hv, ok := obj.(hv1.Hypervisor)
- if !ok {
- return false, errors.New("object is not a Hypervisor")
- }
- // Match by hypervisor availability zone label.
- az, ok := labels["az"]
- if !ok {
- return false, errors.New("object does not have availability zone label")
- }
- hvAZ, ok := hv.Labels["topology.kubernetes.io/zone"]
- if !ok {
- return false, errors.New("hypervisor does not have availability zone label")
- }
- return hvAZ == az, nil
-}
-
-func main() {
- _ = Client{
- ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
- hv1.GroupVersion.WithKind("Hypervisor"): HypervisorResourceRouter{},
- },
- }
+// A remote cluster with routing labels used to match resources to clusters.
+type remoteCluster struct {
+ cluster cluster.Cluster
+ labels map[string]string
}
type Client struct {
+ // ResourceRouters determine which cluster a resource should be written to
+ // when multiple clusters serve the same GVK.
ResourceRouters map[schema.GroupVersionKind]ResourceRouter
// The cluster in which cortex is deployed.
@@ -60,30 +38,44 @@ type Client struct {
// This scheme should include all types used in the remote clusters.
HomeScheme *runtime.Scheme
- // Remote clusters to use by resource type.
- // The clusters provided are expected to accept the home cluster's
- // service account tokens and know about the resources being managed.
- remoteClusters map[schema.GroupVersionKind]cluster.Cluster
+ // Remote clusters to use by resource type. Multiple clusters can serve
+ // the same GVK (e.g. one per availability zone).
+ remoteClusters map[schema.GroupVersionKind][]remoteCluster
// Mutex to protect access to remoteClusters.
remoteClustersMu sync.RWMutex
+
+ // GVKs for which a write operation falls back to the home cluster
+ // when no remote cluster matches.
+ fallbackGVKs map[schema.GroupVersionKind]bool
}
type ClientConfig struct {
- // Apiserver overrides.
- APIServerOverrides []APIServerOverride `json:"apiServerOverrides,omitempty"`
+ // Fallback GVKs that are written to the home cluster if no router match is found.
+ Fallbacks []FallbackConfig `json:"fallbacks,omitempty"`
+ // Apiserver overrides that map GVKs to remote clusters.
+ APIServerOverrides []APIServerOverride `json:"apiservers,omitempty"`
}
-// Config which maps a kubernetes resource URI to a remote kubernetes apiserver.
-// This override config can be used to manage CRDs in a different kubernetes cluster.
-// It is assumed that the remote apiserver accepts the serviceaccount tokens
-// issued by the local cluster.
-type APIServerOverride struct {
- // The resource GVK formatted as "/", e.g. "cortex.cloud/v1alpha1/Decision"
+// FallbackConfig specifies a GVK that falls back to the home cluster for writes
+// when no remote cluster matches.
+type FallbackConfig struct {
+ // The resource GVK formatted as "//".
GVK string `json:"gvk"`
- // The remote kubernetes apiserver url, e.g. "https://my-apiserver:6443"
+}
+
+// APIServerOverride maps multiple GVKs to a remote kubernetes apiserver with
+// routing labels. It is assumed that the remote apiserver accepts the
+// serviceaccount tokens issued by the local cluster.
+type APIServerOverride struct {
+ // The remote kubernetes apiserver url, e.g. "https://my-apiserver:6443".
Host string `json:"host"`
// The root CA certificate to verify the remote apiserver.
CACert string `json:"caCert,omitempty"`
+ // The resource GVKs this apiserver serves, formatted as "//".
+ GVKs []string `json:"gvks"`
+ // Labels used by ResourceRouters to match resources to this cluster
+ // for write operations (Create/Update/Delete/Patch).
+ Labels map[string]string `json:"labels,omitempty"`
}
// Helper function to initialize a new multicluster client during service startup,
@@ -93,29 +85,39 @@ func (c *Client) InitFromConf(ctx context.Context, mgr ctrl.Manager, conf Client
log.Info("initializing multicluster client with config", "config", conf)
// Map the formatted gvk from the config to the actual gvk object so that we
// can look up the right cluster for a given API server override.
- var gvksByConfStr = make(map[string]schema.GroupVersionKind)
+ gvksByConfStr := make(map[string]schema.GroupVersionKind)
for gvk := range c.HomeScheme.AllKnownTypes() {
- // This produces something like: "cortex.cloud/v1alpha1/Decision" which can
- // be used to look up the right cluster for a given API server override.
formatted := gvk.GroupVersion().String() + "/" + gvk.Kind
gvksByConfStr[formatted] = gvk
}
for gvkStr := range gvksByConfStr {
log.Info("scheme gvk registered", "gvk", gvkStr)
}
- for _, override := range conf.APIServerOverrides {
- // Check if we have any registered gvk for this API server override.
- gvk, ok := gvksByConfStr[override.GVK]
+ // Parse fallback GVKs.
+ c.fallbackGVKs = make(map[schema.GroupVersionKind]bool)
+ for _, fb := range conf.Fallbacks {
+ gvk, ok := gvksByConfStr[fb.GVK]
if !ok {
- return errors.New("no gvk registered for API server override " + override.GVK)
+ return errors.New("no gvk registered for fallback " + fb.GVK)
+ }
+ log.Info("registering fallback gvk", "gvk", gvk)
+ c.fallbackGVKs[gvk] = true
+ }
+ // Parse apiserver overrides.
+ for _, override := range conf.APIServerOverrides {
+ var resolvedGVKs []schema.GroupVersionKind
+ for _, gvkStr := range override.GVKs {
+ gvk, ok := gvksByConfStr[gvkStr]
+ if !ok {
+ return errors.New("no gvk registered for API server override " + gvkStr)
+ }
+ resolvedGVKs = append(resolvedGVKs, gvk)
}
- cluster, err := c.AddRemote(ctx, override.Host, override.CACert, gvk)
+ cl, err := c.AddRemote(ctx, override.Host, override.CACert, override.Labels, resolvedGVKs...)
if err != nil {
return err
}
- // Also tell the manager about this cluster so that controllers can use it.
- // This will execute the cluster.Start function when the manager starts.
- if err := mgr.Add(cluster); err != nil {
+ if err := mgr.Add(cl); err != nil {
return err
}
}
@@ -128,12 +130,11 @@ func (c *Client) InitFromConf(ctx context.Context, mgr ctrl.Manager, conf Client
// This can be used when the remote cluster accepts the home cluster's service
// account tokens. See the kubernetes documentation on structured auth to
// learn more about jwt-based authentication across clusters.
-func (c *Client) AddRemote(ctx context.Context, host, caCert string, gvks ...schema.GroupVersionKind) (cluster.Cluster, error) {
+func (c *Client) AddRemote(ctx context.Context, host, caCert string, labels map[string]string, gvks ...schema.GroupVersionKind) (cluster.Cluster, error) {
log := ctrl.LoggerFrom(ctx)
homeRestConfig := *c.HomeRestConfig
restConfigCopy := homeRestConfig
restConfigCopy.Host = host
- // This must be the CA data for the remote apiserver.
restConfigCopy.CAData = []byte(caCert)
cl, err := cluster.New(&restConfigCopy, func(o *cluster.Options) {
o.Scheme = c.HomeScheme
@@ -144,16 +145,19 @@ func (c *Client) AddRemote(ctx context.Context, host, caCert string, gvks ...sch
c.remoteClustersMu.Lock()
defer c.remoteClustersMu.Unlock()
if c.remoteClusters == nil {
- c.remoteClusters = make(map[schema.GroupVersionKind]cluster.Cluster)
+ c.remoteClusters = make(map[schema.GroupVersionKind][]remoteCluster)
}
for _, gvk := range gvks {
- log.Info("adding remote cluster for resource", "gvk", gvk, "host", host)
- c.remoteClusters[gvk] = cl
+ log.Info("adding remote cluster for resource", "gvk", gvk, "host", host, "labels", labels)
+ c.remoteClusters[gvk] = append(c.remoteClusters[gvk], remoteCluster{
+ cluster: cl,
+ labels: labels,
+ })
}
return cl, nil
}
-// Get the gvk registered for the given resource in the home cluster's RESTMapper.
+// Get the gvk registered for the given resource in the home cluster's scheme.
func (c *Client) GVKFromHomeScheme(obj runtime.Object) (gvk schema.GroupVersionKind, err error) {
gvks, unversioned, err := c.HomeScheme.ObjectKinds(obj)
if err != nil {
@@ -168,109 +172,186 @@ func (c *Client) GVKFromHomeScheme(obj runtime.Object) (gvk schema.GroupVersionK
return gvks[0], nil
}
-func (c *Client) ClusterForResource(routable Routable) cluster.Cluster {
+// ClustersForGVK returns all clusters that serve the given GVK.
+// If no remote clusters are configured, only the home cluster is returned.
+// For fallback GVKs with remote clusters, the home cluster is appended
+// because resources might have been written there as a fallback.
+func (c *Client) ClustersForGVK(gvk schema.GroupVersionKind) []cluster.Cluster {
c.remoteClustersMu.RLock()
defer c.remoteClustersMu.RUnlock()
- // TODO: Match the approprite cluster.
-
- return c.HomeCluster
+ remotes := c.remoteClusters[gvk]
+ if len(remotes) == 0 {
+ return []cluster.Cluster{c.HomeCluster}
+ }
+ clusters := make([]cluster.Cluster, 0, len(remotes)+1)
+ for _, r := range remotes {
+ clusters = append(clusters, r.cluster)
+ }
+ if c.fallbackGVKs[gvk] {
+ clusters = append(clusters, c.HomeCluster)
+ }
+ return clusters
}
-// Get the client for the given resource URI.
+// clusterForWrite uses a ResourceRouter to determine which remote cluster
+// a resource should be written to based on the resource content and cluster labels.
//
-// If this URI does not have a remote cluster configured, the home cluster's
-// Get the client for the given resource group version kind.
-//
-// If this object kind does not have a remote cluster configured, the home cluster's
-// client is returned.
-func (c *Client) ClientForResource(gvk schema.GroupVersionKind) client.Client {
- return c.
- ClusterForResource(gvk).
- GetClient()
+// If no remote clusters are configured for the GVK, the home cluster is returned.
+// If a ResourceRouter is configured and matches a cluster, that cluster is returned.
+// If no match is found and the GVK has a fallback configured, the home cluster is returned.
+// Otherwise an error is returned.
+func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.Cluster, error) {
+ c.remoteClustersMu.RLock()
+ defer c.remoteClustersMu.RUnlock()
+ remotes := c.remoteClusters[gvk]
+ if len(remotes) == 0 {
+ return c.HomeCluster, nil
+ }
+ router, ok := c.ResourceRouters[gvk]
+ if !ok {
+ // If there are more than one remote cluster and no router, we don't know which one to write to.
+ // That's why we need to return an error in that case. If there's only one remote cluster, we can safely assume
+ if len(remotes) == 1 {
+ return remotes[0].cluster, nil
+ }
+ return nil, fmt.Errorf("no ResourceRouter configured for GVK %s with %d clusters", gvk, len(remotes))
+ }
+ for _, r := range remotes {
+ match, err := router.Match(obj, r.labels)
+ if err != nil {
+ return nil, fmt.Errorf("ResourceRouter match error for GVK %s: %w", gvk, err)
+ }
+ if match {
+ return r.cluster, nil
+ }
+ }
+
+ // If we can't match any remote cluster but the GVK is configured to fall back to the home cluster, return the home cluster.
+ if c.fallbackGVKs[gvk] {
+ return c.HomeCluster, nil
+ }
+ return nil, fmt.Errorf("no cluster matched for GVK %s and no fallback configured", gvk)
}
-// Pick the right cluster based on the resource type and perform a Get operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Get iterates over all clusters with the GVK and returns the first result found.
+// If no cluster has the resource, the last NotFound error is returned.
func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.ClientForResource(gvk).Get(ctx, key, obj, opts...)
+ clusters := c.ClustersForGVK(gvk)
+ var lastErr error
+ for _, cl := range clusters {
+ err := cl.GetClient().Get(ctx, key, obj, opts...)
+ if err == nil {
+ return nil
+ }
+ if !apierrors.IsNotFound(err) {
+ return err
+ }
+ lastErr = err
+ }
+ return lastErr
}
-// Pick the right cluster based on the resource type and perform a List operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// List iterates over all clusters with the GVK and returns a combined list.
func (c *Client) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
gvk, err := c.GVKFromHomeScheme(list)
if err != nil {
return err
}
- return c.ClientForResource(gvk).List(ctx, list, opts...)
+ clusters := c.ClustersForGVK(gvk)
+
+ var allItems []runtime.Object
+ for _, cl := range clusters {
+ listCopy := list.DeepCopyObject().(client.ObjectList)
+ if err := cl.GetClient().List(ctx, listCopy, opts...); err != nil {
+ return err
+ }
+ items, err := meta.ExtractList(listCopy)
+ if err != nil {
+ return err
+ }
+ allItems = append(allItems, items...)
+ }
+ return meta.SetList(list, allItems)
}
// Apply is not supported in the multicluster client as the group version kind
-// cannot be inferred from the ApplyConfiguration. Use ClientForResource
-// and call Apply on the returned client instead.
+// cannot be inferred from the ApplyConfiguration.
func (c *Client) Apply(ctx context.Context, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error {
return errors.New("apply operation is not supported in multicluster client")
}
-// Pick the right cluster based on the resource type and perform a Create operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Create routes the object to the matching cluster using the ResourceRouter
+// and performs a Create operation.
func (c *Client) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.ClientForResource(gvk).Create(ctx, obj, opts...)
+ cl, err := c.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Create(ctx, obj, opts...)
}
-// Pick the right cluster based on the resource type and perform a Delete operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Delete routes the object to the matching cluster using the ResourceRouter
+// and performs a Delete operation.
func (c *Client) Delete(ctx context.Context, obj client.Object, opts ...client.DeleteOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.ClientForResource(gvk).Delete(ctx, obj, opts...)
+ cl, err := c.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Delete(ctx, obj, opts...)
}
-// Pick the right cluster based on the resource type and perform an Update operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Update routes the object to the matching cluster using the ResourceRouter
+// and performs an Update operation.
func (c *Client) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.ClientForResource(gvk).Update(ctx, obj, opts...)
+ cl, err := c.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Update(ctx, obj, opts...)
}
-// Pick the right cluster based on the resource type and perform a Patch operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Patch routes the object to the matching cluster using the ResourceRouter
+// and performs a Patch operation.
func (c *Client) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.ClientForResource(gvk).Patch(ctx, obj, patch, opts...)
+ cl, err := c.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Patch(ctx, obj, patch, opts...)
}
-// Pick the right cluster based on the resource type and perform a DeleteAllOf operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// DeleteAllOf iterates over all clusters with the GVK and performs DeleteAllOf on each.
func (c *Client) DeleteAllOf(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.ClientForResource(gvk).DeleteAllOf(ctx, obj, opts...)
+ for _, cl := range c.ClustersForGVK(gvk) {
+ if err := cl.GetClient().DeleteAllOf(ctx, obj, opts...); err != nil {
+ return err
+ }
+ }
+ return nil
}
// Return the scheme of the home cluster.
@@ -297,49 +378,50 @@ func (c *Client) IsObjectNamespaced(obj runtime.Object) (bool, error) {
// based on the resource type.
func (c *Client) Status() client.StatusWriter { return &statusClient{multiclusterClient: c} }
-// Wrapper around the status subresource client which picks the right cluster
-// based on the resource type.
+// Wrapper around the status subresource client which routes to the correct cluster.
type statusClient struct{ multiclusterClient *Client }
-// Pick the right cluster based on the resource type and perform a Create operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Create routes the status create to the matching cluster using the ResourceRouter.
func (c *statusClient) Create(ctx context.Context, obj, subResource client.Object, opts ...client.SubResourceCreateOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- Status().Create(ctx, obj, subResource, opts...)
+ cl, err := c.multiclusterClient.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Status().Create(ctx, obj, subResource, opts...)
}
-// Pick the right cluster based on the resource type and perform an Update operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Update routes the status update to the matching cluster using the ResourceRouter.
func (c *statusClient) Update(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- Status().Update(ctx, obj, opts...)
+ cl, err := c.multiclusterClient.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Status().Update(ctx, obj, opts...)
}
-// Pick the right cluster based on the resource type and perform a Patch operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Patch routes the status patch to the matching cluster using the ResourceRouter.
func (c *statusClient) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- Status().Patch(ctx, obj, patch, opts...)
+ cl, err := c.multiclusterClient.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().Status().Patch(ctx, obj, patch, opts...)
}
// Apply is not supported in the multicluster status client as the group version kind
-// cannot be inferred from the ApplyConfiguration. Use ClientForResource
-// and call Apply on the returned client instead.
+// cannot be inferred from the ApplyConfiguration.
func (c *statusClient) Apply(ctx context.Context, obj runtime.ApplyConfiguration, opts ...client.SubResourceApplyOption) error {
return errors.New("apply operation is not supported in multicluster status client")
}
@@ -353,71 +435,79 @@ func (c *Client) SubResource(subResource string) client.SubResourceClient {
}
}
-// Wrapper around a subresource client which picks the right cluster
-// based on the resource type.
+// Wrapper around a subresource client which routes to the correct cluster.
type subResourceClient struct {
- // The parent multicluster client.
multiclusterClient *Client
- // The name of the subresource.
- subResource string
+ subResource string
}
-// Pick the right cluster based on the resource type and perform a Get operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Get iterates over all clusters with the GVK and returns the first result found.
func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Object, opts ...client.SubResourceGetOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- SubResource(c.subResource).Get(ctx, obj, subResource, opts...)
+ clusters := c.multiclusterClient.ClustersForGVK(gvk)
+ var lastErr error
+ for _, cl := range clusters {
+ err := cl.GetClient().SubResource(c.subResource).Get(ctx, obj, subResource, opts...)
+ if err == nil {
+ return nil
+ }
+ if !apierrors.IsNotFound(err) {
+ return err
+ }
+ lastErr = err
+ }
+ return lastErr
}
-// Pick the right cluster based on the resource type and perform a Create operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Create routes the subresource create to the matching cluster using the ResourceRouter.
func (c *subResourceClient) Create(ctx context.Context, obj, subResource client.Object, opts ...client.SubResourceCreateOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- SubResource(c.subResource).Create(ctx, obj, subResource, opts...)
+ cl, err := c.multiclusterClient.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().SubResource(c.subResource).Create(ctx, obj, subResource, opts...)
}
-// Pick the right cluster based on the resource type and perform an Update operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Update routes the subresource update to the matching cluster using the ResourceRouter.
func (c *subResourceClient) Update(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- SubResource(c.subResource).Update(ctx, obj, opts...)
+ cl, err := c.multiclusterClient.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().SubResource(c.subResource).Update(ctx, obj, opts...)
}
-// Pick the right cluster based on the resource type and perform a Patch operation.
-// If the object does not implement Resource or no custom cluster is configured,
-// the home cluster is used.
+// Patch routes the subresource patch to the matching cluster using the ResourceRouter.
func (c *subResourceClient) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
}
- return c.multiclusterClient.ClientForResource(gvk).
- SubResource(c.subResource).Patch(ctx, obj, patch, opts...)
+ cl, err := c.multiclusterClient.clusterForWrite(gvk, obj)
+ if err != nil {
+ return err
+ }
+ return cl.GetClient().SubResource(c.subResource).Patch(ctx, obj, patch, opts...)
}
// Apply is not supported in the multicluster subresource client as the group version kind
-// cannot be inferred from the ApplyConfiguration. Use ClientForResource
-// and call Apply on the returned client instead.
+// cannot be inferred from the ApplyConfiguration.
func (c *subResourceClient) Apply(ctx context.Context, obj runtime.ApplyConfiguration, opts ...client.SubResourceApplyOption) error {
return errors.New("apply operation is not supported in multicluster subresource client")
}
-// Index a field for a resource in the matching cluster's cache.
+// Index a field for a resource in all matching cluster caches.
// Usually, you want to index the same field in both the object and list type,
// as both would be mapped to individual clients based on their GVK.
func (c *Client) IndexField(ctx context.Context, obj client.Object, list client.ObjectList, field string, extractValue client.IndexerFunc) error {
@@ -425,27 +515,32 @@ func (c *Client) IndexField(ctx context.Context, obj client.Object, list client.
if err != nil {
return err
}
- objCluster := c.ClusterForResource(gvkObj)
- if err := objCluster.
- GetCache().
- IndexField(ctx, obj, field, extractValue); err != nil {
- return err
- }
- // Index the object in the list cluster as well.
gvkList, err := c.GVKFromHomeScheme(list)
if err != nil {
return err
}
- objListCluster := c.ClusterForResource(gvkList)
- // If the object and list map to the same cluster, we have already indexed
- // the field above and re-defining the index will lead to an indexer conflict.
- if objCluster == objListCluster {
- return nil
+ // Collect all unique caches to index.
+ type cacheKey struct{}
+ indexed := make(map[any]bool)
+ for _, cl := range c.ClustersForGVK(gvkObj) {
+ ch := cl.GetCache()
+ if indexed[ch] {
+ continue
+ }
+ indexed[ch] = true
+ if err := ch.IndexField(ctx, obj, field, extractValue); err != nil {
+ return err
+ }
}
- if err := objListCluster.
- GetCache().
- IndexField(ctx, obj, field, extractValue); err != nil {
- return err
+ for _, cl := range c.ClustersForGVK(gvkList) {
+ ch := cl.GetCache()
+ if indexed[ch] {
+ continue
+ }
+ indexed[ch] = true
+ if err := ch.IndexField(ctx, obj, field, extractValue); err != nil {
+ return err
+ }
}
return nil
}
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index 74d595e78..2fa9a8f1a 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -44,8 +44,7 @@ func (u *unknownType) DeepCopyObject() runtime.Object {
// fakeCache implements cache.Cache interface for testing IndexField.
type fakeCache struct {
cache.Cache
- indexFieldFunc func(ctx context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error
- // Track calls to IndexField for verification
+ indexFieldFunc func(ctx context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error
indexFieldCalls []indexFieldCall
mu sync.Mutex
}
@@ -113,289 +112,306 @@ func newTestScheme(t *testing.T) *runtime.Scheme {
return scheme
}
-// TestClient_Apply tests that the Apply method returns an error.
-func TestClient_Apply(t *testing.T) {
- scheme := newTestScheme(t)
+// testRouter is a simple ResourceRouter for testing.
+type testRouter struct{}
- c := &Client{
- HomeScheme: scheme,
+func (r testRouter) Match(obj any, labels map[string]string) (bool, error) {
+ cm, ok := obj.(*corev1.ConfigMap)
+ if !ok {
+ return false, nil
+ }
+ az, ok := labels["az"]
+ if !ok {
+ return false, nil
+ }
+ objAZ, ok := cm.Labels["az"]
+ if !ok {
+ return false, nil
}
+ return objAZ == az, nil
+}
- ctx := context.Background()
+var configMapGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "ConfigMap"}
+var configMapListGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "ConfigMapList"}
+var podGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"}
- t.Run("apply returns error", func(t *testing.T) {
- err := c.Apply(ctx, nil)
- if err == nil {
- t.Error("expected error for Apply operation")
- }
- if err.Error() != "apply operation is not supported in multicluster client" {
- t.Errorf("unexpected error message: %v", err)
- }
- })
+func TestClient_Apply(t *testing.T) {
+ c := &Client{HomeScheme: newTestScheme(t)}
+
+ // Check if apply will throw an error since it's not supported by multicluster client.
+ err := c.Apply(context.Background(), nil)
+ if err == nil {
+ t.Error("expected error for Apply operation")
+ }
}
-// TestStatusClient_Apply tests that the status client Apply method returns an error.
func TestStatusClient_Apply(t *testing.T) {
sc := &statusClient{multiclusterClient: &Client{}}
- ctx := context.Background()
-
- err := sc.Apply(ctx, nil)
+ // Check if apply will throw an error since it's not supported by multicluster client.
+ err := sc.Apply(context.Background(), nil)
if err == nil {
t.Error("expected error for Apply operation")
}
- if err.Error() != "apply operation is not supported in multicluster status client" {
- t.Errorf("unexpected error message: %v", err)
- }
}
-// TestSubResourceClientApply tests that the subresource client Apply method returns an error.
-func TestSubResourceClientApply(t *testing.T) {
- src := &subResourceClient{
- multiclusterClient: &Client{},
- subResource: "status",
- }
-
- ctx := context.Background()
+func TestSubResourceClient_Apply(t *testing.T) {
+ src := &subResourceClient{multiclusterClient: &Client{}, subResource: "status"}
- err := src.Apply(ctx, nil)
+ // Check if apply will throw an error since it's not supported by multicluster client.
+ err := src.Apply(context.Background(), nil)
if err == nil {
t.Error("expected error for Apply operation")
}
- if err.Error() != "apply operation is not supported in multicluster subresource client" {
- t.Errorf("unexpected error message: %v", err)
- }
}
-// TestClient_ClusterForResource_NilRemoteClusters tests behavior when no remote clusters are configured.
-func TestClient_ClusterForResource_NilRemoteClusters(t *testing.T) {
+func TestClient_ClustersForGVK_NoRemoteClusters(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
c := &Client{
- remoteClusters: nil,
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
}
- gvk := schema.GroupVersionKind{
- Group: "test",
- Version: "v1",
- Kind: "TestKind",
+ clusters := c.ClustersForGVK(configMapGVK)
+ if len(clusters) != 1 {
+ t.Fatalf("expected 1 cluster, got %d", len(clusters))
}
-
- // When remoteClusters is nil and HomeCluster is nil, we should get nil
- result := c.ClusterForResource(gvk)
- if result != nil {
- t.Error("expected nil when no home cluster is set")
+ if clusters[0] != homeCluster {
+ t.Error("expected home cluster")
}
}
-// TestClient_ClusterForResource_EmptyRemoteClusters tests behavior with empty remote clusters map.
-func TestClient_ClusterForResource_EmptyRemoteClusters(t *testing.T) {
+func TestClient_ClustersForGVK_SingleRemoteCluster(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote := newFakeCluster(scheme)
c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
+ },
}
- gvk := schema.GroupVersionKind{
- Group: "test",
- Version: "v1",
- Kind: "TestKind",
+ clusters := c.ClustersForGVK(configMapGVK)
+ if len(clusters) != 1 {
+ t.Fatalf("expected 1 cluster, got %d", len(clusters))
}
-
- // When remoteClusters is empty and HomeCluster is nil, we should get nil
- result := c.ClusterForResource(gvk)
- if result != nil {
- t.Error("expected nil when no home cluster is set and GVK not found")
+ if clusters[0] != remote {
+ t.Error("expected remote cluster")
}
}
-// TestClient_Status returns a status writer.
-func TestClient_Status(t *testing.T) {
- c := &Client{}
-
- status := c.Status()
- if status == nil {
- t.Error("expected non-nil status writer")
+func TestClient_ClustersForGVK_MultipleRemoteClusters(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote1 := newFakeCluster(scheme)
+ remote2 := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
}
- // Verify it's the right type
- if _, ok := status.(*statusClient); !ok {
- t.Error("expected statusClient type")
+ clusters := c.ClustersForGVK(configMapGVK)
+ if len(clusters) != 2 {
+ t.Fatalf("expected 2 clusters, got %d", len(clusters))
}
}
-// TestClient_SubResource returns a subresource client.
-func TestClient_SubResource(t *testing.T) {
- c := &Client{}
+func TestClient_ClustersForGVK_FallbackIncludesHome(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
+ },
+ fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
+ }
- subResource := c.SubResource("scale")
- if subResource == nil {
- t.Error("expected non-nil subresource client")
+ clusters := c.ClustersForGVK(configMapGVK)
+ if len(clusters) != 2 {
+ t.Fatalf("expected 2 clusters (remote + home fallback), got %d", len(clusters))
+ }
+ if clusters[0] != remote {
+ t.Error("expected remote cluster first")
}
+ if clusters[1] != homeCluster {
+ t.Error("expected home cluster as fallback")
+ }
+}
- // Verify it's the right type
- src, ok := subResource.(*subResourceClient)
- if !ok {
- t.Error("expected subResourceClient type")
+func TestClient_clusterForWrite_NoRemoteClusters(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
}
- if src.subResource != "scale" {
- t.Errorf("expected subResource='scale', got '%s'", src.subResource)
+ cl, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if cl != homeCluster {
+ t.Error("expected home cluster when no remotes configured")
}
}
-// TestClient_AddRemote_NilRemoteClusters initializes the remote clusters map.
-func TestClient_AddRemote_NilRemoteClusters(t *testing.T) {
+func TestClient_clusterForWrite_SingleRemoteCluster(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote := newFakeCluster(scheme)
c := &Client{
- remoteClusters: nil,
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
+ },
}
- // Just verify the lock mechanism works without panicking
- c.remoteClustersMu.Lock()
- if c.remoteClusters == nil {
- c.remoteClusters = make(map[schema.GroupVersionKind]cluster.Cluster)
+ cl, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
}
- c.remoteClustersMu.Unlock()
-
- // Should not panic
- if c.remoteClusters == nil {
- t.Error("expected remoteClusters to be initialized")
+ if cl != remote {
+ t.Error("expected remote cluster for single remote")
}
}
-// TestClient_ConcurrentAccess tests thread safety of ClusterForResource.
-func TestClient_ConcurrentAccess(t *testing.T) {
+func TestClient_clusterForWrite_RouterMatches(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote1 := newFakeCluster(scheme)
+ remote2 := newFakeCluster(scheme)
c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
}
- gvk := schema.GroupVersionKind{
- Group: "test",
- Version: "v1",
- Kind: "TestKind",
+ obj := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"az": "az-2"}},
+ }
+ cl, err := c.clusterForWrite(configMapGVK, obj)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if cl != remote2 {
+ t.Error("expected second remote cluster for az-2")
}
+}
- // Test concurrent reads - should not panic or race
- done := make(chan bool)
- for range 10 {
- go func() {
- _ = c.ClusterForResource(gvk)
- done <- true
- }()
+func TestClient_clusterForWrite_NoMatchWithFallback(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote1 := newFakeCluster(scheme)
+ remote2 := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
+ fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
- for range 10 {
- <-done
+ // Object with az-3 doesn't match any remote cluster.
+ obj := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"az": "az-3"}},
+ }
+ cl, err := c.clusterForWrite(configMapGVK, obj)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if cl != homeCluster {
+ t.Error("expected home cluster as fallback")
}
}
-// TestObjectKeyFromConfigMap tests that we can construct object keys properly.
-func TestObjectKeyFromConfigMap(t *testing.T) {
- cm := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-config",
- Namespace: "default",
+func TestClient_clusterForWrite_NoMatchNoFallback(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote1 := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: newFakeCluster(scheme), labels: map[string]string{"az": "az-2"}},
+ },
},
}
- key := client.ObjectKeyFromObject(cm)
- if key.Name != "test-config" {
- t.Errorf("expected Name='test-config', got '%s'", key.Name)
+ obj := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"az": "az-3"}},
}
- if key.Namespace != "default" {
- t.Errorf("expected Namespace='default', got '%s'", key.Namespace)
+ _, err := c.clusterForWrite(configMapGVK, obj)
+ if err == nil {
+ t.Error("expected error when no match and no fallback")
}
}
-// TestGVKExtraction tests that GVK can be properly set and retrieved.
-func TestGVKExtraction(t *testing.T) {
- cm := &corev1.ConfigMap{}
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
+func TestClient_clusterForWrite_NoRouterMultipleClusters(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: newFakeCluster(scheme), labels: map[string]string{"az": "az-1"}},
+ {cluster: newFakeCluster(scheme), labels: map[string]string{"az": "az-2"}},
+ },
+ },
}
- cm.SetGroupVersionKind(gvk)
-
- result := cm.GetObjectKind().GroupVersionKind()
- if result != gvk {
- t.Errorf("expected GVK %v, got %v", gvk, result)
+ _, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
+ if err == nil {
+ t.Error("expected error when no router with multiple clusters")
}
}
-// TestGVKFromHomeScheme_Success tests successful GVK lookup for registered types.
func TestGVKFromHomeScheme_Success(t *testing.T) {
scheme := newTestScheme(t)
-
- c := &Client{
- HomeScheme: scheme,
- }
+ c := &Client{HomeScheme: scheme}
tests := []struct {
name string
obj runtime.Object
expectedGVK schema.GroupVersionKind
}{
- {
- name: "ConfigMap",
- obj: &corev1.ConfigMap{},
- expectedGVK: schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- },
- },
- {
- name: "ConfigMapList",
- obj: &corev1.ConfigMapList{},
- expectedGVK: schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMapList",
- },
- },
- {
- name: "Secret",
- obj: &corev1.Secret{},
- expectedGVK: schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Secret",
- },
- },
- {
- name: "Pod",
- obj: &corev1.Pod{},
- expectedGVK: schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Pod",
- },
- },
- {
- name: "Service",
- obj: &corev1.Service{},
- expectedGVK: schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Service",
- },
- },
- {
- name: "v1alpha1 Decision",
- obj: &v1alpha1.Decision{},
- expectedGVK: schema.GroupVersionKind{
- Group: "cortex.cloud",
- Version: "v1alpha1",
- Kind: "Decision",
- },
- },
- {
- name: "v1alpha1 DecisionList",
- obj: &v1alpha1.DecisionList{},
- expectedGVK: schema.GroupVersionKind{
- Group: "cortex.cloud",
- Version: "v1alpha1",
- Kind: "DecisionList",
- },
- },
+ {"ConfigMap", &corev1.ConfigMap{}, configMapGVK},
+ {"ConfigMapList", &corev1.ConfigMapList{}, configMapListGVK},
+ {"Decision", &v1alpha1.Decision{}, schema.GroupVersionKind{Group: "cortex.cloud", Version: "v1alpha1", Kind: "Decision"}},
+ {"DecisionList", &v1alpha1.DecisionList{}, schema.GroupVersionKind{Group: "cortex.cloud", Version: "v1alpha1", Kind: "DecisionList"}},
}
for _, tt := range tests {
@@ -411,457 +427,437 @@ func TestGVKFromHomeScheme_Success(t *testing.T) {
}
}
-// TestGVKFromHomeScheme_UnknownType tests error handling for unregistered types.
func TestGVKFromHomeScheme_UnknownType(t *testing.T) {
- scheme := newTestScheme(t)
-
- c := &Client{
- HomeScheme: scheme,
- }
-
- obj := &unknownType{}
- _, err := c.GVKFromHomeScheme(obj)
+ c := &Client{HomeScheme: newTestScheme(t)}
+ _, err := c.GVKFromHomeScheme(&unknownType{})
if err == nil {
t.Error("expected error for unknown type")
}
}
-// TestGVKFromHomeScheme_UnversionedType tests error handling for unversioned types.
func TestGVKFromHomeScheme_UnversionedType(t *testing.T) {
scheme := runtime.NewScheme()
-
- // Register the type as unversioned
scheme.AddUnversionedTypes(schema.GroupVersion{Group: "", Version: "v1"}, &unversionedType{})
-
- c := &Client{
- HomeScheme: scheme,
- }
-
- obj := &unversionedType{}
- _, err := c.GVKFromHomeScheme(obj)
+ c := &Client{HomeScheme: scheme}
+ _, err := c.GVKFromHomeScheme(&unversionedType{})
if err == nil {
t.Error("expected error for unversioned type")
}
- if err.Error() != "cannot list unversioned resource" {
- t.Errorf("unexpected error message: %v", err)
- }
}
-// TestGVKFromHomeScheme_NilScheme tests behavior with nil scheme.
func TestGVKFromHomeScheme_NilScheme(t *testing.T) {
- c := &Client{
- HomeScheme: nil,
- }
-
- obj := &corev1.ConfigMap{}
-
- // Should panic or return error when scheme is nil
+ c := &Client{HomeScheme: nil}
defer func() {
if r := recover(); r == nil {
t.Error("expected panic with nil scheme")
}
}()
-
- _, err := c.GVKFromHomeScheme(obj)
- if err == nil {
- t.Error("expected error with nil scheme")
- }
+ _, _ = c.GVKFromHomeScheme(&corev1.ConfigMap{})
}
-// TestClient_ClusterForResource_WithRemoteCluster tests ClusterForResource with a remote cluster configured.
-func TestClient_ClusterForResource_WithRemoteCluster(t *testing.T) {
+func TestClient_Get_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
+ existingCM := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"},
+ Data: map[string]string{"key": "remote-value"},
}
+ remote := newFakeCluster(scheme, existingCM)
+ homeCluster := newFakeCluster(scheme)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
- }
-
- // Should return the remote cluster for the registered GVK
- result := c.ClusterForResource(gvk)
- if result != remoteCluster {
- t.Error("expected remote cluster for registered GVK")
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
+ },
}
- // Should return home cluster for non-registered GVK
- otherGVK := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Secret",
+ cm := &corev1.ConfigMap{}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "test-cm", Namespace: "default"}, cm)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
}
- result = c.ClusterForResource(otherGVK)
- if result != homeCluster {
- t.Error("expected home cluster for non-registered GVK")
+ if cm.Data["key"] != "remote-value" {
+ t.Errorf("expected 'remote-value', got '%s'", cm.Data["key"])
}
}
-// TestClient_ClientForResource tests ClientForResource returns the correct client.
-func TestClient_ClientForResource(t *testing.T) {
- scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme)
+func TestClient_Get_MultiCluster_FirstFound(t *testing.T) {
+ // Iterate all remote clusters and return the first found object. In this test, only remote2 has the object, so it should be returned.
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
+ scheme := newTestScheme(t)
+ existingCM := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "test-cm", Namespace: "default"},
+ Data: map[string]string{"key": "from-cluster-2"},
}
+ remote1 := newFakeCluster(scheme) // empty
+ remote2 := newFakeCluster(scheme, existingCM)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
- }
-
- // Should return the remote cluster's client for the registered GVK
- result := c.ClientForResource(gvk)
- if result != remoteCluster.GetClient() {
- t.Error("expected remote cluster client for registered GVK")
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
}
- // Should return home cluster's client for non-registered GVK
- otherGVK := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Secret",
+ cm := &corev1.ConfigMap{}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "test-cm", Namespace: "default"}, cm)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
}
- result = c.ClientForResource(otherGVK)
- if result != homeCluster.GetClient() {
- t.Error("expected home cluster client for non-registered GVK")
+ if cm.Data["key"] != "from-cluster-2" {
+ t.Errorf("expected 'from-cluster-2', got '%s'", cm.Data["key"])
}
}
-// TestClient_Scheme tests that Scheme returns the home cluster's client scheme.
-func TestClient_Scheme(t *testing.T) {
+func TestClient_Get_MultiCluster_NotFound(t *testing.T) {
+ // Iterate all remote clusters and return NotFound if object is not found in any cluster (including home cluster if fallback is enabled).
+ // In this test, the object doesn't exist in any cluster, so NotFound should be returned.
+
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
+ remote1 := newFakeCluster(scheme) // empty
+ remote2 := newFakeCluster(scheme) // empty
c := &Client{
- HomeCluster: homeCluster,
+ HomeCluster: newFakeCluster(scheme),
HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1},
+ {cluster: remote2},
+ },
+ },
}
- result := c.Scheme()
- if result == nil {
- t.Error("expected non-nil scheme")
+ cm := &corev1.ConfigMap{}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "missing", Namespace: "default"}, cm)
+ if err == nil {
+ t.Error("expected NotFound error")
}
}
-// TestClient_RESTMapper tests that RESTMapper returns the home cluster's client RESTMapper.
-func TestClient_RESTMapper(t *testing.T) {
+func TestClient_Get_UnknownType(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
c := &Client{
- HomeCluster: homeCluster,
+ HomeCluster: newFakeCluster(scheme),
HomeScheme: scheme,
}
-
- result := c.RESTMapper()
- if result == nil {
- t.Error("expected non-nil RESTMapper")
+ obj := &unknownType{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "test", Namespace: "default"}, obj)
+ if err == nil {
+ t.Error("expected error for unknown type")
}
}
-// TestClient_GroupVersionKindFor tests GroupVersionKindFor returns correct GVK.
-func TestClient_GroupVersionKindFor(t *testing.T) {
+func TestClient_Get_FallbackCluster(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
+ homeCluster := newFakeCluster(scheme, &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "home-cm", Namespace: "default"},
+ Data: map[string]string{"key": "from-home"},
+ })
+ remote := newFakeCluster(scheme) // empty
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
+ },
+ fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
- gvk, err := c.GroupVersionKindFor(&corev1.ConfigMap{})
+ cm := &corev1.ConfigMap{}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "home-cm", Namespace: "default"}, cm)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
-
- expected := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
- if gvk != expected {
- t.Errorf("expected GVK %v, got %v", expected, gvk)
+ if cm.Data["key"] != "from-home" {
+ t.Errorf("expected 'from-home', got '%s'", cm.Data["key"])
}
}
-// TestClient_IsObjectNamespaced tests IsObjectNamespaced delegates to home cluster client.
-func TestClient_IsObjectNamespaced(t *testing.T) {
+func TestClient_List_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
+ cm1 := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm1", Namespace: "default"}}
+ cm2 := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm2", Namespace: "default"}}
+ remote := newFakeCluster(scheme, cm1, cm2)
homeCluster := newFakeCluster(scheme)
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapListGVK: {{cluster: remote}},
+ },
}
- // The fake client's RESTMapper doesn't have all mappings, so we just test
- // that the method delegates properly to the home cluster's client.
- // We expect an error due to the fake client's limited RESTMapper.
- _, err := c.IsObjectNamespaced(&corev1.ConfigMap{})
- // The fake client doesn't have a proper RESTMapper, so this will fail,
- // but we're testing that the delegation works.
- _ = err // Error expected with fake client
+ cmList := &corev1.ConfigMapList{}
+ err := c.List(context.Background(), cmList, client.InNamespace("default"))
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(cmList.Items) != 2 {
+ t.Errorf("expected 2 items, got %d", len(cmList.Items))
+ }
}
-// TestClient_Get tests the Get method routes to the correct cluster.
-func TestClient_Get(t *testing.T) {
+func TestClient_List_MultipleClusters_CombinesResults(t *testing.T) {
scheme := newTestScheme(t)
+ remote1 := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm-az1-1", Namespace: "default"}},
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm-az1-2", Namespace: "default"}},
+ )
+ remote2 := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm-az2-1", Namespace: "default"}},
+ )
- // Create a ConfigMap in the remote cluster
- existingCM := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-cm",
- Namespace: "default",
+ c := &Client{
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapListGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
},
- Data: map[string]string{"key": "remote-value"},
}
- remoteCluster := newFakeCluster(scheme, existingCM)
- homeCluster := newFakeCluster(scheme)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
+ cmList := &corev1.ConfigMapList{}
+ err := c.List(context.Background(), cmList, client.InNamespace("default"))
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
}
+ if len(cmList.Items) != 3 {
+ t.Errorf("expected 3 combined items, got %d", len(cmList.Items))
+ }
+}
+
+func TestClient_List_FallbackIncludesHome(t *testing.T) {
+ scheme := newTestScheme(t)
+ remote := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "remote-cm", Namespace: "default"}},
+ )
+ homeCluster := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "home-cm", Namespace: "default"}},
+ )
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapListGVK: {{cluster: remote}},
+ },
+ fallbackGVKs: map[schema.GroupVersionKind]bool{configMapListGVK: true},
}
- ctx := context.Background()
-
- // Get from remote cluster (ConfigMap GVK is registered)
- cm := &corev1.ConfigMap{}
- err := c.Get(ctx, client.ObjectKey{Name: "test-cm", Namespace: "default"}, cm)
+ cmList := &corev1.ConfigMapList{}
+ err := c.List(context.Background(), cmList, client.InNamespace("default"))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- if cm.Data["key"] != "remote-value" {
- t.Errorf("expected 'remote-value', got '%s'", cm.Data["key"])
+ if len(cmList.Items) != 2 {
+ t.Errorf("expected 2 items (remote + home), got %d", len(cmList.Items))
}
}
-// TestClient_Get_UnknownType tests Get returns error for unknown types.
-func TestClient_Get_UnknownType(t *testing.T) {
+func TestClient_List_HomeClusterOnly(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
+ homeCluster := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "home-cm", Namespace: "default"}},
+ )
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
}
- ctx := context.Background()
-
- obj := &unknownType{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test",
- Namespace: "default",
- },
+ cmList := &corev1.ConfigMapList{}
+ err := c.List(context.Background(), cmList, client.InNamespace("default"))
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
}
-
- err := c.Get(ctx, client.ObjectKey{Name: "test", Namespace: "default"}, obj)
- if err == nil {
- t.Error("expected error for unknown type")
+ if len(cmList.Items) != 1 {
+ t.Errorf("expected 1 item, got %d", len(cmList.Items))
}
}
-// TestClient_List tests the List method routes to the correct cluster.
-func TestClient_List(t *testing.T) {
+func TestClient_Create_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ remote := newFakeCluster(scheme)
- // Create ConfigMaps in the remote cluster
- cm1 := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "cm1",
- Namespace: "default",
- },
- }
- cm2 := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "cm2",
- Namespace: "default",
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
},
}
- remoteCluster := newFakeCluster(scheme, cm1, cm2)
- homeCluster := newFakeCluster(scheme)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMapList",
- }
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "new-cm", Namespace: "default"},
}
-
- ctx := context.Background()
-
- // List from remote cluster
- cmList := &corev1.ConfigMapList{}
- err := c.List(ctx, cmList, client.InNamespace("default"))
+ err := c.Create(context.Background(), cm)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- if len(cmList.Items) != 2 {
- t.Errorf("expected 2 items, got %d", len(cmList.Items))
+
+ // Verify it was created in remote, not home.
+ result := &corev1.ConfigMap{}
+ err = remote.GetClient().Get(context.Background(), client.ObjectKey{Name: "new-cm", Namespace: "default"}, result)
+ if err != nil {
+ t.Fatalf("expected to find in remote cluster: %v", err)
}
}
-// TestClient_Create tests the Create method routes to the correct cluster.
-func TestClient_Create(t *testing.T) {
+func TestClient_Create_RouterMatchesCluster(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
+ remote1 := newFakeCluster(scheme)
+ remote2 := newFakeCluster(scheme)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
}
- ctx := context.Background()
-
- // Create in remote cluster
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "new-cm",
Namespace: "default",
+ Labels: map[string]string{"az": "az-2"},
},
}
- err := c.Create(ctx, cm)
+ err := c.Create(context.Background(), cm)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Verify it was created in remote cluster
+ // Should be in remote2, not remote1.
result := &corev1.ConfigMap{}
- err = remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "new-cm", Namespace: "default"}, result)
+ err = remote2.GetClient().Get(context.Background(), client.ObjectKey{Name: "new-cm", Namespace: "default"}, result)
if err != nil {
- t.Fatalf("failed to get created object from remote cluster: %v", err)
+ t.Fatalf("expected to find in remote2: %v", err)
+ }
+ err = remote1.GetClient().Get(context.Background(), client.ObjectKey{Name: "new-cm", Namespace: "default"}, result)
+ if err == nil {
+ t.Error("should NOT be in remote1")
}
}
-// TestClient_Delete tests the Delete method routes to the correct cluster.
-func TestClient_Delete(t *testing.T) {
+func TestClient_Create_FallbackToHome(t *testing.T) {
scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
- existingCM := &corev1.ConfigMap{
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: newFakeCluster(scheme), labels: map[string]string{"az": "az-1"}},
+ },
+ },
+ fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
+ }
+
+ // Object with az-99 doesn't match any remote — should fall back to home.
+ cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
- Name: "to-delete",
+ Name: "fallback-cm",
Namespace: "default",
+ Labels: map[string]string{"az": "az-99"},
},
}
- homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme, existingCM)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
+ err := c.Create(context.Background(), cm)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
}
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ result := &corev1.ConfigMap{}
+ err = homeCluster.GetClient().Get(context.Background(), client.ObjectKey{Name: "fallback-cm", Namespace: "default"}, result)
+ if err != nil {
+ t.Fatalf("expected to find in home cluster (fallback): %v", err)
}
+}
- ctx := context.Background()
+func TestClient_Delete_SingleRemoteCluster(t *testing.T) {
+ scheme := newTestScheme(t)
+ existingCM := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "to-delete", Namespace: "default"},
+ }
+ homeCluster := newFakeCluster(scheme)
+ remote := newFakeCluster(scheme, existingCM)
- // Delete from remote cluster
- cm := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "to-delete",
- Namespace: "default",
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
},
}
- err := c.Delete(ctx, cm)
+
+ cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "to-delete", Namespace: "default"}}
+ err := c.Delete(context.Background(), cm)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Verify it was deleted from remote cluster
result := &corev1.ConfigMap{}
- err = remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "to-delete", Namespace: "default"}, result)
+ err = remote.GetClient().Get(context.Background(), client.ObjectKey{Name: "to-delete", Namespace: "default"}, result)
if err == nil {
t.Error("expected object to be deleted from remote cluster")
}
}
-// TestClient_Update tests the Update method routes to the correct cluster.
-func TestClient_Update(t *testing.T) {
+func TestClient_Update_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
-
existingCM := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "to-update",
- Namespace: "default",
- },
- Data: map[string]string{"key": "old-value"},
+ ObjectMeta: metav1.ObjectMeta{Name: "to-update", Namespace: "default"},
+ Data: map[string]string{"key": "old-value"},
}
-
homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme, existingCM)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
+ remote := newFakeCluster(scheme, existingCM)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
+ },
}
ctx := context.Background()
-
- // First get the object to have the correct resource version
cm := &corev1.ConfigMap{}
- err := remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "to-update", Namespace: "default"}, cm)
+ err := remote.GetClient().Get(ctx, client.ObjectKey{Name: "to-update", Namespace: "default"}, cm)
if err != nil {
t.Fatalf("failed to get object: %v", err)
}
- // Update in remote cluster
cm.Data["key"] = "new-value"
err = c.Update(ctx, cm)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Verify it was updated in remote cluster
result := &corev1.ConfigMap{}
- err = remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "to-update", Namespace: "default"}, result)
+ err = remote.GetClient().Get(ctx, client.ObjectKey{Name: "to-update", Namespace: "default"}, result)
if err != nil {
t.Fatalf("failed to get updated object: %v", err)
}
@@ -870,42 +866,25 @@ func TestClient_Update(t *testing.T) {
}
}
-// TestClient_Patch tests the Patch method routes to the correct cluster.
-func TestClient_Patch(t *testing.T) {
+func TestClient_Patch_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
-
existingCM := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "to-patch",
- Namespace: "default",
- },
- Data: map[string]string{"key": "old-value"},
+ ObjectMeta: metav1.ObjectMeta{Name: "to-patch", Namespace: "default"},
+ Data: map[string]string{"key": "old-value"},
}
-
homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme, existingCM)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
+ remote := newFakeCluster(scheme, existingCM)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
+ },
}
ctx := context.Background()
-
- // Patch in remote cluster
- cm := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "to-patch",
- Namespace: "default",
- },
- }
+ cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "to-patch", Namespace: "default"}}
patch := client.MergeFrom(cm.DeepCopy())
cm.Data = map[string]string{"key": "patched-value"}
err := c.Patch(ctx, cm, patch)
@@ -913,9 +892,8 @@ func TestClient_Patch(t *testing.T) {
t.Fatalf("unexpected error: %v", err)
}
- // Verify it was patched in remote cluster
result := &corev1.ConfigMap{}
- err = remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "to-patch", Namespace: "default"}, result)
+ err = remote.GetClient().Get(ctx, client.ObjectKey{Name: "to-patch", Namespace: "default"}, result)
if err != nil {
t.Fatalf("failed to get patched object: %v", err)
}
@@ -924,51 +902,32 @@ func TestClient_Patch(t *testing.T) {
}
}
-// TestClient_DeleteAllOf tests the DeleteAllOf method routes to the correct cluster.
-func TestClient_DeleteAllOf(t *testing.T) {
+func TestClient_DeleteAllOf_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
-
cm1 := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "cm1",
- Namespace: "default",
- Labels: map[string]string{"app": "test"},
- },
+ ObjectMeta: metav1.ObjectMeta{Name: "cm1", Namespace: "default", Labels: map[string]string{"app": "test"}},
}
cm2 := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "cm2",
- Namespace: "default",
- Labels: map[string]string{"app": "test"},
- },
+ ObjectMeta: metav1.ObjectMeta{Name: "cm2", Namespace: "default", Labels: map[string]string{"app": "test"}},
}
-
homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme, cm1, cm2)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
+ remote := newFakeCluster(scheme, cm1, cm2)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
+ },
}
- ctx := context.Background()
-
- // DeleteAllOf in remote cluster
- err := c.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace("default"), client.MatchingLabels{"app": "test"})
+ err := c.DeleteAllOf(context.Background(), &corev1.ConfigMap{}, client.InNamespace("default"), client.MatchingLabels{"app": "test"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Verify all were deleted from remote cluster
cmList := &corev1.ConfigMapList{}
- err = remoteCluster.GetClient().List(ctx, cmList, client.InNamespace("default"))
+ err = remote.GetClient().List(context.Background(), cmList, client.InNamespace("default"))
if err != nil {
t.Fatalf("failed to list objects: %v", err)
}
@@ -977,232 +936,128 @@ func TestClient_DeleteAllOf(t *testing.T) {
}
}
-// TestClient_ConcurrentAddRemote tests thread safety of adding remote clusters.
-func TestClient_ConcurrentAddRemote(t *testing.T) {
- c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
- }
-
- var wg sync.WaitGroup
- for i := range 10 {
- wg.Add(1)
- go func(i int) {
- defer wg.Done()
- gvk := schema.GroupVersionKind{
- Group: "test",
- Version: "v1",
- Kind: "TestKind" + string(rune('A'+i)),
- }
- c.remoteClustersMu.Lock()
- c.remoteClusters[gvk] = nil
- c.remoteClustersMu.Unlock()
- }(i)
- }
- wg.Wait()
-
- if len(c.remoteClusters) != 10 {
- t.Errorf("expected 10 remote clusters, got %d", len(c.remoteClusters))
- }
-}
-
-// TestClient_ConcurrentClusterForResourceAndAddRemote tests concurrent read/write operations.
-func TestClient_ConcurrentClusterForResourceAndAddRemote(t *testing.T) {
- c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind]cluster.Cluster),
- }
-
- gvk := schema.GroupVersionKind{
- Group: "test",
- Version: "v1",
- Kind: "TestKind",
- }
-
- var wg sync.WaitGroup
-
- // Readers
- for range 10 {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for range 100 {
- _ = c.ClusterForResource(gvk)
- }
- }()
- }
-
- // Writers
- for range 5 {
- wg.Add(1)
- go func() {
- defer wg.Done()
- for range 100 {
- c.remoteClustersMu.Lock()
- c.remoteClusters[gvk] = nil
- c.remoteClustersMu.Unlock()
- }
- }()
- }
-
- wg.Wait()
-}
-
-// TestStatusClient_Create tests the status client Create method.
-func TestStatusClient_Create(t *testing.T) {
+func TestClient_DeleteAllOf_MultipleClusters(t *testing.T) {
scheme := newTestScheme(t)
-
- pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- }
-
- homeCluster := newFakeCluster(scheme, pod)
+ remote1 := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm1", Namespace: "default", Labels: map[string]string{"app": "test"}}},
+ )
+ remote2 := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm2", Namespace: "default", Labels: map[string]string{"app": "test"}}},
+ )
c := &Client{
- HomeCluster: homeCluster,
+ HomeCluster: newFakeCluster(scheme),
HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1},
+ {cluster: remote2},
+ },
+ },
}
- ctx := context.Background()
-
- // Create requires a subresource object, but we're just testing the routing
- sc := c.Status()
+ err := c.DeleteAllOf(context.Background(), &corev1.ConfigMap{}, client.InNamespace("default"), client.MatchingLabels{"app": "test"})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
- // The fake client doesn't support status subresource creation in the standard way,
- // but we can verify the method exists and routes correctly
- err := sc.Create(ctx, pod, &corev1.Pod{})
- // We expect an error because the fake client doesn't support this,
- // but we're testing that the routing works
- _ = err // Error is expected with fake client
+ for i, remote := range []*fakeCluster{remote1, remote2} {
+ cmList := &corev1.ConfigMapList{}
+ _ = remote.GetClient().List(context.Background(), cmList, client.InNamespace("default"))
+ if len(cmList.Items) != 0 {
+ t.Errorf("expected 0 items in remote%d, got %d", i+1, len(cmList.Items))
+ }
+ }
}
-// TestStatusClient_Update tests the status client Update method.
func TestStatusClient_Update(t *testing.T) {
scheme := newTestScheme(t)
-
pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- Status: corev1.PodStatus{
- Phase: corev1.PodPending,
- },
+ ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "default"},
+ Status: corev1.PodStatus{Phase: corev1.PodPending},
}
-
homeCluster := newFakeCluster(scheme, pod)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
+ c := &Client{HomeCluster: homeCluster, HomeScheme: scheme}
ctx := context.Background()
-
- // Get the pod first
existingPod := &corev1.Pod{}
- err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod)
- if err != nil {
+ if err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod); err != nil {
t.Fatalf("failed to get pod: %v", err)
}
-
- // Update status
existingPod.Status.Phase = corev1.PodRunning
- err = c.Status().Update(ctx, existingPod)
- if err != nil {
+ if err := c.Status().Update(ctx, existingPod); err != nil {
t.Fatalf("unexpected error: %v", err)
}
+
+ // Validate the update took effect
+ result := &corev1.Pod{}
+ if err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, result); err != nil {
+ t.Fatalf("failed to get updated pod: %v", err)
+ }
+ if result.Status.Phase != corev1.PodRunning {
+ t.Errorf("expected PodRunning, got %s", result.Status.Phase)
+ }
}
-// TestStatusClient_Patch tests the status client Patch method.
func TestStatusClient_Patch(t *testing.T) {
scheme := newTestScheme(t)
-
pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- Status: corev1.PodStatus{
- Phase: corev1.PodPending,
- },
+ ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "default"},
+ Status: corev1.PodStatus{Phase: corev1.PodPending},
}
-
homeCluster := newFakeCluster(scheme, pod)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
+ c := &Client{HomeCluster: homeCluster, HomeScheme: scheme}
ctx := context.Background()
-
- // Get the pod first
existingPod := &corev1.Pod{}
- err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod)
- if err != nil {
+ if err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod); err != nil {
t.Fatalf("failed to get pod: %v", err)
}
-
- // Patch status
patch := client.MergeFrom(existingPod.DeepCopy())
existingPod.Status.Phase = corev1.PodRunning
- err = c.Status().Patch(ctx, existingPod, patch)
- if err != nil {
+ if err := c.Status().Patch(ctx, existingPod, patch); err != nil {
t.Fatalf("unexpected error: %v", err)
}
+
+ // Validate the patch took effect
+ result := &corev1.Pod{}
+ if err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, result); err != nil {
+ t.Fatalf("failed to get patched pod: %v", err)
+ }
+ if result.Status.Phase != corev1.PodRunning {
+ t.Errorf("expected PodRunning, got %s", result.Status.Phase)
+ }
}
-// TestStatusClient_RoutesToRemoteCluster tests that status client routes to remote cluster.
func TestStatusClient_RoutesToRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
-
pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- Status: corev1.PodStatus{
- Phase: corev1.PodPending,
- },
+ ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "default"},
+ Status: corev1.PodStatus{Phase: corev1.PodPending},
}
-
homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme, pod)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Pod",
- }
+ remote := newFakeCluster(scheme, pod)
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ podGVK: {{cluster: remote}},
+ },
}
ctx := context.Background()
-
- // Get the pod from remote cluster
existingPod := &corev1.Pod{}
- err := remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod)
- if err != nil {
+ if err := remote.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod); err != nil {
t.Fatalf("failed to get pod: %v", err)
}
-
- // Update status via multicluster client - should go to remote cluster
existingPod.Status.Phase = corev1.PodRunning
- err = c.Status().Update(ctx, existingPod)
- if err != nil {
+ if err := c.Status().Update(ctx, existingPod); err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Verify it was updated in remote cluster
result := &corev1.Pod{}
- err = remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, result)
- if err != nil {
+ if err := remote.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, result); err != nil {
t.Fatalf("failed to get updated pod: %v", err)
}
if result.Status.Phase != corev1.PodRunning {
@@ -1210,456 +1065,185 @@ func TestStatusClient_RoutesToRemoteCluster(t *testing.T) {
}
}
-// TestSubResourceClient_Get tests the subresource client Get method.
-func TestSubResourceClient_Get(t *testing.T) {
+func TestClient_StatusAndSubResource_ErrorOnUnknownType(t *testing.T) {
scheme := newTestScheme(t)
+ c := &Client{HomeCluster: newFakeCluster(scheme), HomeScheme: scheme}
+ ctx := context.Background()
+ obj := &unknownType{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}}
- pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
+ if err := c.Status().Update(ctx, obj); err == nil {
+ t.Error("expected error for unknown type in status Update")
+ }
+ if err := c.Status().Patch(ctx, obj, client.MergeFrom(obj)); err == nil {
+ t.Error("expected error for unknown type in status Patch")
+ }
+ if err := c.SubResource("status").Update(ctx, obj); err == nil {
+ t.Error("expected error for unknown type in subresource Update")
+ }
+ if err := c.SubResource("status").Patch(ctx, obj, client.MergeFrom(obj)); err == nil {
+ t.Error("expected error for unknown type in subresource Patch")
}
+}
- homeCluster := newFakeCluster(scheme, pod)
+func TestSubResourceClient_RoutesToRemoteCluster(t *testing.T) {
+ scheme := newTestScheme(t)
+ pod := &corev1.Pod{
+ ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "default"},
+ }
+ homeCluster := newFakeCluster(scheme)
+ remote := newFakeCluster(scheme, pod)
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ podGVK: {{cluster: remote}},
+ },
}
ctx := context.Background()
-
- // The fake client may not support all subresources, but we can test the routing
+ existingPod := &corev1.Pod{}
+ if err := remote.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod); err != nil {
+ t.Fatalf("failed to get pod: %v", err)
+ }
src := c.SubResource("status")
- err := src.Get(ctx, pod, &corev1.Pod{})
- // Error is expected with fake client for subresource operations
- _ = err
+ if err := src.Update(ctx, existingPod); err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
}
-// TestSubResourceClient_Create tests the subresource client Create method.
-func TestSubResourceClient_Create(t *testing.T) {
+func TestClient_GroupVersionKindFor(t *testing.T) {
scheme := newTestScheme(t)
-
- pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- }
-
- homeCluster := newFakeCluster(scheme, pod)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
-
- src := c.SubResource("eviction")
- err := src.Create(ctx, pod, &corev1.Pod{})
- // Error is expected with fake client for subresource operations
- _ = err
-}
-
-// TestSubResourceClient_Update tests the subresource client Update method.
-func TestSubResourceClient_Update(t *testing.T) {
- scheme := newTestScheme(t)
-
- pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- }
-
- homeCluster := newFakeCluster(scheme, pod)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
-
- // Get the pod first
- existingPod := &corev1.Pod{}
- err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod)
- if err != nil {
- t.Fatalf("failed to get pod: %v", err)
- }
-
- src := c.SubResource("status")
- err = src.Update(ctx, existingPod)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
-}
-
-// TestSubResourceClient_Patch tests the subresource client Patch method.
-func TestSubResourceClient_Patch(t *testing.T) {
- scheme := newTestScheme(t)
-
- pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- }
-
- homeCluster := newFakeCluster(scheme, pod)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
-
- // Get the pod first
- existingPod := &corev1.Pod{}
- err := homeCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod)
- if err != nil {
- t.Fatalf("failed to get pod: %v", err)
- }
-
- patch := client.MergeFrom(existingPod.DeepCopy())
- src := c.SubResource("status")
- err = src.Patch(ctx, existingPod, patch)
+ c := &Client{HomeCluster: newFakeCluster(scheme), HomeScheme: scheme}
+ gvk, err := c.GroupVersionKindFor(&corev1.ConfigMap{})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
-}
-
-// TestSubResourceClient_RoutesToRemoteCluster tests that subresource client routes to remote cluster.
-func TestSubResourceClient_RoutesToRemoteCluster(t *testing.T) {
- scheme := newTestScheme(t)
-
- pod := &corev1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pod",
- Namespace: "default",
- },
- }
-
- homeCluster := newFakeCluster(scheme)
- remoteCluster := newFakeCluster(scheme, pod)
-
- gvk := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "Pod",
- }
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{gvk: remoteCluster},
- }
-
- ctx := context.Background()
-
- // Get the pod from remote cluster
- existingPod := &corev1.Pod{}
- err := remoteCluster.GetClient().Get(ctx, client.ObjectKey{Name: "test-pod", Namespace: "default"}, existingPod)
- if err != nil {
- t.Fatalf("failed to get pod: %v", err)
- }
-
- // Update via subresource client - should go to remote cluster
- src := c.SubResource("status")
- err = src.Update(ctx, existingPod)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
+ if gvk != configMapGVK {
+ t.Errorf("expected GVK %v, got %v", configMapGVK, gvk)
}
}
-// TestGVKFromHomeScheme_WithDifferentAPIGroups tests GVK lookup for different API groups.
-func TestGVKFromHomeScheme_WithDifferentAPIGroups(t *testing.T) {
- scheme := newTestScheme(t)
-
- c := &Client{
- HomeScheme: scheme,
- }
-
- tests := []struct {
- name string
- obj runtime.Object
- expectedGrp string
- }{
- {
- name: "core API group (empty string)",
- obj: &corev1.ConfigMap{},
- expectedGrp: "",
- },
- {
- name: "custom API group",
- obj: &v1alpha1.Decision{},
- expectedGrp: "cortex.cloud",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- gvk, err := c.GVKFromHomeScheme(tt.obj)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- if gvk.Group != tt.expectedGrp {
- t.Errorf("expected group '%s', got '%s'", tt.expectedGrp, gvk.Group)
- }
- })
- }
-}
-
-// TestClient_Operations_WithHomeClusterOnly tests operations when no remote clusters are configured.
-func TestClient_Operations_WithHomeClusterOnly(t *testing.T) {
+func TestClient_IndexField_WithRemoteClusters(t *testing.T) {
scheme := newTestScheme(t)
-
- existingCM := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{
- Name: "home-cm",
- Namespace: "default",
- },
- Data: map[string]string{"key": "home-value"},
- }
-
- homeCluster := newFakeCluster(scheme, existingCM)
+ homeCache := &fakeCache{}
+ homeCluster := newFakeClusterWithCache(scheme, homeCache)
+ remoteObjCache := &fakeCache{}
+ remoteObjCluster := newFakeClusterWithCache(scheme, remoteObjCache)
+ remoteListCache := &fakeCache{}
+ remoteListCluster := newFakeClusterWithCache(scheme, remoteListCache)
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remoteObjCluster}},
+ configMapListGVK: {{cluster: remoteListCluster}},
+ },
}
- ctx := context.Background()
-
- // Get from home cluster
- cm := &corev1.ConfigMap{}
- err := c.Get(ctx, client.ObjectKey{Name: "home-cm", Namespace: "default"}, cm)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- if cm.Data["key"] != "home-value" {
- t.Errorf("expected 'home-value', got '%s'", cm.Data["key"])
- }
-
- // List from home cluster
- cmList := &corev1.ConfigMapList{}
- err = c.List(ctx, cmList, client.InNamespace("default"))
+ err := c.IndexField(context.Background(), &corev1.ConfigMap{}, &corev1.ConfigMapList{}, "metadata.name", func(obj client.Object) []string {
+ return []string{obj.GetName()}
+ })
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- if len(cmList.Items) != 1 {
- t.Errorf("expected 1 item, got %d", len(cmList.Items))
- }
-}
-
-// TestClient_StatusAndSubResource_ErrorOnUnknownType tests error handling for unknown types.
-func TestClient_StatusAndSubResource_ErrorOnUnknownType(t *testing.T) {
- scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
-
- obj := &unknownType{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test",
- Namespace: "default",
- },
- }
- // Status client should return error for unknown type
- err := c.Status().Update(ctx, obj)
- if err == nil {
- t.Error("expected error for unknown type in status Update")
+ if len(remoteObjCache.getIndexFieldCalls()) != 1 {
+ t.Errorf("expected 1 IndexField call on remote obj cache, got %d", len(remoteObjCache.getIndexFieldCalls()))
}
-
- err = c.Status().Patch(ctx, obj, client.MergeFrom(obj))
- if err == nil {
- t.Error("expected error for unknown type in status Patch")
+ if len(remoteListCache.getIndexFieldCalls()) != 1 {
+ t.Errorf("expected 1 IndexField call on remote list cache, got %d", len(remoteListCache.getIndexFieldCalls()))
}
-
- // SubResource client should return error for unknown type
- err = c.SubResource("status").Update(ctx, obj)
- if err == nil {
- t.Error("expected error for unknown type in subresource Update")
- }
-
- err = c.SubResource("status").Patch(ctx, obj, client.MergeFrom(obj))
- if err == nil {
- t.Error("expected error for unknown type in subresource Patch")
+ if len(homeCache.getIndexFieldCalls()) != 0 {
+ t.Errorf("expected 0 IndexField calls on home cache, got %d", len(homeCache.getIndexFieldCalls()))
}
}
-// TestClient_IndexField_WithRemoteClusters tests IndexField with remote clusters configured.
-func TestClient_IndexField_WithRemoteClusters(t *testing.T) {
+func TestClient_IndexField_SameClusterSkipsDuplicate(t *testing.T) {
scheme := newTestScheme(t)
-
+ remoteCache := &fakeCache{}
+ remote := newFakeClusterWithCache(scheme, remoteCache)
homeCache := &fakeCache{}
homeCluster := newFakeClusterWithCache(scheme, homeCache)
- remoteObjCache := &fakeCache{}
- remoteObjCluster := newFakeClusterWithCache(scheme, remoteObjCache)
-
- remoteListCache := &fakeCache{}
- remoteListCluster := newFakeClusterWithCache(scheme, remoteListCache)
-
- objGVK := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
- listGVK := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMapList",
- }
-
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{
- objGVK: remoteObjCluster,
- listGVK: remoteListCluster,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote}},
+ configMapListGVK: {{cluster: remote}}, // same cluster
},
}
- ctx := context.Background()
-
- obj := &corev1.ConfigMap{}
- list := &corev1.ConfigMapList{}
- field := "metadata.name"
- extractValue := func(obj client.Object) []string {
+ err := c.IndexField(context.Background(), &corev1.ConfigMap{}, &corev1.ConfigMapList{}, "metadata.name", func(obj client.Object) []string {
return []string{obj.GetName()}
- }
-
- err := c.IndexField(ctx, obj, list, field, extractValue)
+ })
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Verify IndexField was called on the remote object cluster's cache
- objCalls := remoteObjCache.getIndexFieldCalls()
- if len(objCalls) != 1 {
- t.Errorf("expected 1 IndexField call on remote object cluster, got %d", len(objCalls))
+ if len(remoteCache.getIndexFieldCalls()) != 1 {
+ t.Errorf("expected 1 IndexField call (deduped), got %d", len(remoteCache.getIndexFieldCalls()))
}
-
- // Verify IndexField was called on the remote list cluster's cache
- listCalls := remoteListCache.getIndexFieldCalls()
- if len(listCalls) != 1 {
- t.Errorf("expected 1 IndexField call on remote list cluster, got %d", len(listCalls))
- }
-
- // Verify home cluster cache was NOT called
- homeCalls := homeCache.getIndexFieldCalls()
- if len(homeCalls) != 0 {
- t.Errorf("expected 0 IndexField calls on home cluster, got %d", len(homeCalls))
+ if len(homeCache.getIndexFieldCalls()) != 0 {
+ t.Errorf("expected 0 IndexField calls on home, got %d", len(homeCache.getIndexFieldCalls()))
}
}
-// TestClient_IndexField_SameClusterSkipsSecondIndex tests that when object and list map to
-// the same cluster, IndexField is only called once to avoid re-defining the index (which would error).
-func TestClient_IndexField_SameClusterSkipsSecondIndex(t *testing.T) {
+func TestClient_IndexField_HomeClusterOnly(t *testing.T) {
scheme := newTestScheme(t)
-
- // Use the same cache/cluster for both object and list GVKs
- remoteCache := &fakeCache{}
- remoteCluster := newFakeClusterWithCache(scheme, remoteCache)
-
homeCache := &fakeCache{}
homeCluster := newFakeClusterWithCache(scheme, homeCache)
- objGVK := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- }
- listGVK := schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMapList",
- }
-
- // Both object and list GVKs point to the SAME remote cluster instance
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind]cluster.Cluster{
- objGVK: remoteCluster,
- listGVK: remoteCluster, // Same cluster instance
- },
}
- ctx := context.Background()
-
- obj := &corev1.ConfigMap{}
- list := &corev1.ConfigMapList{}
- field := "metadata.name"
- extractValue := func(obj client.Object) []string {
+ err := c.IndexField(context.Background(), &corev1.ConfigMap{}, &corev1.ConfigMapList{}, "metadata.name", func(obj client.Object) []string {
return []string{obj.GetName()}
- }
-
- err := c.IndexField(ctx, obj, list, field, extractValue)
+ })
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
- // Key assertion: IndexField should only be called ONCE because both object
- // and list resolve to the same cluster. Calling it twice would cause an error
- // from re-defining the same index.
- remoteCalls := remoteCache.getIndexFieldCalls()
- if len(remoteCalls) != 1 {
- t.Errorf("expected 1 IndexField call (skipping duplicate for same cluster), got %d", len(remoteCalls))
- }
-
- // Home cluster should not be called at all
- homeCalls := homeCache.getIndexFieldCalls()
- if len(homeCalls) != 0 {
- t.Errorf("expected 0 IndexField calls on home cluster, got %d", len(homeCalls))
+ if len(homeCache.getIndexFieldCalls()) != 1 {
+ t.Errorf("expected 1 IndexField call on home (deduped), got %d", len(homeCache.getIndexFieldCalls()))
}
}
-// TestClient_IndexField_HomeClusterSkipsSecondIndex tests that when both object and list
-// use the home cluster (no remote clusters configured), IndexField is only called once.
-func TestClient_IndexField_HomeClusterSkipsSecondIndex(t *testing.T) {
+func TestClient_ConcurrentAddRemoteAndRead(t *testing.T) {
scheme := newTestScheme(t)
-
- homeCache := &fakeCache{}
- homeCluster := newFakeClusterWithCache(scheme, homeCache)
-
- // No remote clusters configured - both object and list will use home cluster
c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{},
}
- ctx := context.Background()
+ var wg sync.WaitGroup
- obj := &corev1.ConfigMap{}
- list := &corev1.ConfigMapList{}
- field := "metadata.name"
- extractValue := func(obj client.Object) []string {
- return []string{obj.GetName()}
+ // Readers
+ for range 10 {
+ wg.Go(func() {
+ for range 100 {
+ _ = c.ClustersForGVK(configMapGVK)
+ }
+ })
}
- err := c.IndexField(ctx, obj, list, field, extractValue)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
+ // Writers
+ for range 5 {
+ wg.Go(func() {
+ for range 100 {
+ c.remoteClustersMu.Lock()
+ c.remoteClusters[configMapGVK] = append(c.remoteClusters[configMapGVK], remoteCluster{})
+ c.remoteClustersMu.Unlock()
+ }
+ })
}
- // Key assertion: IndexField should only be called ONCE because both object
- // and list resolve to the same home cluster. Calling it twice would cause
- // an error from re-defining the same index.
- homeCalls := homeCache.getIndexFieldCalls()
- if len(homeCalls) != 1 {
- t.Errorf("expected 1 IndexField call (skipping duplicate for same home cluster), got %d", len(homeCalls))
- }
+ wg.Wait()
}
// fakeManager implements ctrl.Manager for testing InitFromConf.
@@ -1679,429 +1263,82 @@ func (f *fakeManager) Add(runnable manager.Runnable) error {
return nil
}
-// TestClient_InitFromConf_EmptyConfig tests that InitFromConf succeeds with an empty config.
func TestClient_InitFromConf_EmptyConfig(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
c := &Client{
- HomeCluster: homeCluster,
+ HomeCluster: newFakeCluster(scheme),
HomeScheme: scheme,
}
-
- ctx := context.Background()
- conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{},
- }
-
- // Create a fake manager - we won't actually use it since there are no overrides
mgr := &fakeManager{}
- err := c.InitFromConf(ctx, mgr, conf)
+ err := c.InitFromConf(context.Background(), mgr, ClientConfig{})
if err != nil {
t.Fatalf("unexpected error with empty config: %v", err)
}
-
- // Verify no runnables were added
if len(mgr.addedRunnables) != 0 {
t.Errorf("expected 0 runnables added, got %d", len(mgr.addedRunnables))
}
}
-// TestClient_InitFromConf_UnregisteredGVK tests that InitFromConf returns an error
-// when the config contains a GVK that is not registered in the scheme.
func TestClient_InitFromConf_UnregisteredGVK(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
c := &Client{
- HomeCluster: homeCluster,
+ HomeCluster: newFakeCluster(scheme),
HomeScheme: scheme,
}
-
- ctx := context.Background()
+ mgr := &fakeManager{}
conf := ClientConfig{
APIServerOverrides: []APIServerOverride{
{
- GVK: "unregistered.group/v1/UnknownKind",
Host: "https://remote-api:6443",
+ GVKs: []string{"unregistered.group/v1/UnknownKind"},
},
},
}
- mgr := &fakeManager{}
-
- err := c.InitFromConf(ctx, mgr, conf)
+ err := c.InitFromConf(context.Background(), mgr, conf)
if err == nil {
- t.Fatal("expected error for unregistered GVK, got nil")
- }
-
- expectedErrMsg := "no gvk registered for API server override unregistered.group/v1/UnknownKind"
- if err.Error() != expectedErrMsg {
- t.Errorf("expected error message '%s', got '%s'", expectedErrMsg, err.Error())
+ t.Fatal("expected error for unregistered GVK")
}
}
-// TestClient_InitFromConf_GVKFormatting tests that the GVK formatting works correctly
-// and matches the expected format for registered types.
-func TestClient_InitFromConf_GVKFormatting(t *testing.T) {
+func TestClient_InitFromConf_UnregisteredFallbackGVK(t *testing.T) {
scheme := newTestScheme(t)
-
- // Test that the GVK formatting matches what InitFromConf expects
- tests := []struct {
- name string
- gvk schema.GroupVersionKind
- expectedStr string
- }{
- {
- name: "core API ConfigMap",
- gvk: schema.GroupVersionKind{
- Group: "",
- Version: "v1",
- Kind: "ConfigMap",
- },
- expectedStr: "v1/ConfigMap",
- },
- {
- name: "cortex Decision",
- gvk: schema.GroupVersionKind{
- Group: "cortex.cloud",
- Version: "v1alpha1",
- Kind: "Decision",
- },
- expectedStr: "cortex.cloud/v1alpha1/Decision",
- },
- {
- name: "cortex DecisionList",
- gvk: schema.GroupVersionKind{
- Group: "cortex.cloud",
- Version: "v1alpha1",
- Kind: "DecisionList",
- },
- expectedStr: "cortex.cloud/v1alpha1/DecisionList",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- // Verify the GVK is in the scheme
- _, found := scheme.AllKnownTypes()[tt.gvk]
- if !found {
- t.Skipf("GVK %v not found in scheme, skipping", tt.gvk)
- }
-
- // Format the GVK the same way InitFromConf does
- formatted := tt.gvk.GroupVersion().String() + "/" + tt.gvk.Kind
- if formatted != tt.expectedStr {
- t.Errorf("expected formatted GVK '%s', got '%s'", tt.expectedStr, formatted)
- }
- })
- }
-}
-
-// TestClient_InitFromConf_MultipleUnregisteredGVKs tests that the first unregistered
-// GVK in the list causes an error.
-func TestClient_InitFromConf_MultipleUnregisteredGVKs(t *testing.T) {
- scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
c := &Client{
- HomeCluster: homeCluster,
+ HomeCluster: newFakeCluster(scheme),
HomeScheme: scheme,
}
-
- ctx := context.Background()
- conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{
- {
- GVK: "first.unregistered/v1/Type1",
- Host: "https://remote-api-1:6443",
- },
- {
- GVK: "second.unregistered/v1/Type2",
- Host: "https://remote-api-2:6443",
- },
- },
- }
-
mgr := &fakeManager{}
-
- err := c.InitFromConf(ctx, mgr, conf)
- if err == nil {
- t.Fatal("expected error for unregistered GVK, got nil")
- }
-
- // Should fail on the first unregistered GVK
- expectedErrMsg := "no gvk registered for API server override first.unregistered/v1/Type1"
- if err.Error() != expectedErrMsg {
- t.Errorf("expected error message '%s', got '%s'", expectedErrMsg, err.Error())
- }
-}
-
-// TestClient_InitFromConf_NilScheme tests behavior when HomeScheme is nil.
-func TestClient_InitFromConf_NilScheme(t *testing.T) {
- c := &Client{
- HomeScheme: nil,
- }
-
- ctx := context.Background()
conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{
- {
- GVK: "test/v1/SomeKind",
- Host: "https://remote-api:6443",
- },
- },
+ Fallbacks: []FallbackConfig{{GVK: "unregistered.group/v1/UnknownKind"}},
}
- mgr := &fakeManager{}
-
- // Should panic or return error due to nil scheme
- defer func() {
- if r := recover(); r == nil {
- t.Error("expected panic with nil scheme")
- }
- }()
-
- err := c.InitFromConf(ctx, mgr, conf)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
-}
-
-// TestClient_InitFromConf_EmptyGVKInConfig tests behavior when an empty GVK string is provided.
-func TestClient_InitFromConf_EmptyGVKInConfig(t *testing.T) {
- scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
- conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{
- {
- GVK: "",
- Host: "https://remote-api:6443",
- },
- },
- }
-
- mgr := &fakeManager{}
-
- err := c.InitFromConf(ctx, mgr, conf)
+ err := c.InitFromConf(context.Background(), mgr, conf)
if err == nil {
- t.Fatal("expected error for empty GVK, got nil")
- }
-
- // Empty GVK won't match any registered type
- expectedErrMsg := "no gvk registered for API server override "
- if err.Error() != expectedErrMsg {
- t.Errorf("expected error message '%s', got '%s'", expectedErrMsg, err.Error())
+ t.Fatal("expected error for unregistered fallback GVK")
}
}
-// TestClient_InitFromConf_PartialGVKMatch tests that partial GVK matches don't work.
-func TestClient_InitFromConf_PartialGVKMatch(t *testing.T) {
+func TestClient_InitFromConf_GVKFormatting(t *testing.T) {
scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
-
- // Try variations that shouldn't match
tests := []struct {
- name string
- gvkStr string
+ name string
+ gvk schema.GroupVersionKind
+ expectedStr string
}{
- {
- name: "missing kind",
- gvkStr: "cortex.cloud/v1alpha1",
- },
- {
- name: "wrong case",
- gvkStr: "cortex.cloud/v1alpha1/decision", // lowercase 'd'
- },
- {
- name: "extra slash",
- gvkStr: "cortex.cloud/v1alpha1/Decision/",
- },
- {
- name: "wrong version",
- gvkStr: "cortex.cloud/v2/Decision",
- },
+ {"core ConfigMap", configMapGVK, "v1/ConfigMap"},
+ {"cortex Decision", schema.GroupVersionKind{Group: "cortex.cloud", Version: "v1alpha1", Kind: "Decision"}, "cortex.cloud/v1alpha1/Decision"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{
- {
- GVK: tt.gvkStr,
- Host: "https://remote-api:6443",
- },
- },
+ if _, found := scheme.AllKnownTypes()[tt.gvk]; !found {
+ t.Skipf("GVK %v not in scheme", tt.gvk)
}
-
- mgr := &fakeManager{}
-
- err := c.InitFromConf(ctx, mgr, conf)
- if err == nil {
- t.Errorf("expected error for GVK string '%s', got nil", tt.gvkStr)
- }
- })
- }
-}
-
-// TestClient_InitFromConf_ValidGVKLookup tests that valid GVK strings are correctly
-// identified in the scheme. Note: This doesn't test the full AddRemote flow since
-// that requires a real REST config connection.
-func TestClient_InitFromConf_ValidGVKLookup(t *testing.T) {
- scheme := newTestScheme(t)
-
- // Build the gvksByConfStr map the same way InitFromConf does
- gvksByConfStr := make(map[string]schema.GroupVersionKind)
- for gvk := range scheme.AllKnownTypes() {
- formatted := gvk.GroupVersion().String() + "/" + gvk.Kind
- gvksByConfStr[formatted] = gvk
- }
-
- // These GVK strings should be found in the map
- validGVKs := []string{
- "v1/ConfigMap",
- "v1/ConfigMapList",
- "v1/Secret",
- "v1/SecretList",
- "v1/Pod",
- "v1/PodList",
- "cortex.cloud/v1alpha1/Decision",
- "cortex.cloud/v1alpha1/DecisionList",
- }
-
- for _, gvkStr := range validGVKs {
- t.Run(gvkStr, func(t *testing.T) {
- gvk, found := gvksByConfStr[gvkStr]
- if !found {
- t.Errorf("expected GVK string '%s' to be found in scheme", gvkStr)
- return
- }
-
- // Verify the GVK is correctly structured
- if gvk.Kind == "" {
- t.Errorf("expected non-empty Kind for GVK string '%s'", gvkStr)
+ formatted := tt.gvk.GroupVersion().String() + "/" + tt.gvk.Kind
+ if formatted != tt.expectedStr {
+ t.Errorf("expected '%s', got '%s'", tt.expectedStr, formatted)
}
})
}
}
-
-// TestAPIServerOverride_Structure tests the APIServerOverride struct.
-func TestAPIServerOverride_Structure(t *testing.T) {
- override := APIServerOverride{
- GVK: "cortex.cloud/v1alpha1/Decision",
- Host: "https://remote-api:6443",
- CACert: "-----BEGIN CERTIFICATE-----\nMIIC...\n-----END CERTIFICATE-----",
- }
-
- if override.GVK != "cortex.cloud/v1alpha1/Decision" {
- t.Errorf("unexpected GVK: %s", override.GVK)
- }
- if override.Host != "https://remote-api:6443" {
- t.Errorf("unexpected Host: %s", override.Host)
- }
- if override.CACert == "" {
- t.Error("expected non-empty CACert")
- }
-}
-
-// TestClientConfig_Structure tests the ClientConfig struct.
-func TestClientConfig_Structure(t *testing.T) {
- conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{
- {
- GVK: "cortex.cloud/v1alpha1/Decision",
- Host: "https://remote-api:6443",
- },
- {
- GVK: "cortex.cloud/v1alpha1/DecisionList",
- Host: "https://remote-api:6443",
- CACert: "cert-data",
- },
- },
- }
-
- if len(conf.APIServerOverrides) != 2 {
- t.Errorf("expected 2 overrides, got %d", len(conf.APIServerOverrides))
- }
-
- // First override
- if conf.APIServerOverrides[0].GVK != "cortex.cloud/v1alpha1/Decision" {
- t.Errorf("unexpected first override GVK: %s", conf.APIServerOverrides[0].GVK)
- }
-
- // Second override
- if conf.APIServerOverrides[1].CACert != "cert-data" {
- t.Errorf("unexpected second override CACert: %s", conf.APIServerOverrides[1].CACert)
- }
-}
-
-// TestClient_InitFromConf_NilConfig tests behavior with nil APIServerOverrides slice.
-func TestClient_InitFromConf_NilOverrides(t *testing.T) {
- scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
-
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- }
-
- ctx := context.Background()
- conf := ClientConfig{
- APIServerOverrides: nil, // nil slice
- }
-
- mgr := &fakeManager{}
-
- err := c.InitFromConf(ctx, mgr, conf)
- if err != nil {
- t.Fatalf("unexpected error with nil overrides: %v", err)
- }
-}
-
-// TestClient_InitFromConf_LogsRegisteredGVKs verifies that the function processes
-// all registered GVKs from the scheme. This is a structural test.
-func TestClient_InitFromConf_SchemeGVKCount(t *testing.T) {
- scheme := newTestScheme(t)
-
- // Count the number of types in the scheme
- allTypes := scheme.AllKnownTypes()
- if len(allTypes) == 0 {
- t.Error("expected scheme to have registered types")
- }
-
- // Build the formatted GVK map
- gvksByConfStr := make(map[string]schema.GroupVersionKind)
- for gvk := range allTypes {
- formatted := gvk.GroupVersion().String() + "/" + gvk.Kind
- gvksByConfStr[formatted] = gvk
- }
-
- // The map should have the same number of entries as types
- // (assuming no duplicate formatted strings, which shouldn't happen)
- if len(gvksByConfStr) == 0 {
- t.Error("expected formatted GVK map to have entries")
- }
-
- // Verify some specific entries exist
- if _, found := gvksByConfStr["v1/ConfigMap"]; !found {
- t.Error("expected v1/ConfigMap to be in formatted GVK map")
- }
- if _, found := gvksByConfStr["cortex.cloud/v1alpha1/Decision"]; !found {
- t.Error("expected cortex.cloud/v1alpha1/Decision to be in formatted GVK map")
- }
-}
diff --git a/pkg/multicluster/routers.go b/pkg/multicluster/routers.go
new file mode 100644
index 000000000..791ca4880
--- /dev/null
+++ b/pkg/multicluster/routers.go
@@ -0,0 +1,37 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package multicluster
+
+import (
+ "errors"
+
+ hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+)
+
+// ResourceRouter determines which remote cluster a resource should be written to
+// by matching the resource content against the cluster's labels.
+type ResourceRouter interface {
+ Match(obj any, labels map[string]string) (bool, error)
+}
+
+// HypervisorResourceRouter routes hypervisors to clusters based on availability zone.
+type HypervisorResourceRouter struct{}
+
+func (h HypervisorResourceRouter) Match(obj any, labels map[string]string) (bool, error) {
+ hv, ok := obj.(hv1.Hypervisor)
+ if !ok {
+ return false, errors.New("object is not a Hypervisor")
+ }
+ az, ok := labels["az"]
+ if !ok {
+ return false, errors.New("cluster does not have availability zone label")
+ }
+ hvAZ, ok := hv.Labels["topology.kubernetes.io/zone"]
+ if !ok {
+ return false, errors.New("hypervisor does not have availability zone label")
+ }
+ return hvAZ == az, nil
+}
+
+// TODO: Add router for Decision CRD and reservations after their refactoring is done.
diff --git a/pkg/multicluster/routers_test.go b/pkg/multicluster/routers_test.go
new file mode 100644
index 000000000..179b84818
--- /dev/null
+++ b/pkg/multicluster/routers_test.go
@@ -0,0 +1,85 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package multicluster
+
+import (
+ "testing"
+
+ hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestHypervisorResourceRouter_Match(t *testing.T) {
+ router := HypervisorResourceRouter{}
+
+ tests := []struct {
+ name string
+ obj any
+ labels map[string]string
+ wantMatch bool
+ wantErr bool
+ }{
+ {
+ name: "matching AZ",
+ obj: hv1.Hypervisor{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"topology.kubernetes.io/zone": "qa-de-1a"},
+ },
+ },
+ labels: map[string]string{"az": "qa-de-1a"},
+ wantMatch: true,
+ },
+ {
+ name: "non-matching AZ",
+ obj: hv1.Hypervisor{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"topology.kubernetes.io/zone": "qa-de-1a"},
+ },
+ },
+ labels: map[string]string{"az": "qa-de-1b"},
+ wantMatch: false,
+ },
+ {
+ name: "not a Hypervisor",
+ obj: "not-a-hypervisor",
+ labels: map[string]string{"az": "qa-de-1a"},
+ wantErr: true,
+ },
+ {
+ name: "cluster missing az label",
+ obj: hv1.Hypervisor{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"topology.kubernetes.io/zone": "qa-de-1a"},
+ },
+ },
+ labels: map[string]string{},
+ wantErr: true,
+ },
+ {
+ name: "hypervisor missing zone label",
+ obj: hv1.Hypervisor{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{},
+ },
+ },
+ labels: map[string]string{"az": "qa-de-1a"},
+ wantErr: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ match, err := router.Match(tt.obj, tt.labels)
+ if tt.wantErr && err == nil {
+ t.Fatal("expected error, got nil")
+ }
+ if !tt.wantErr && err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if match != tt.wantMatch {
+ t.Errorf("expected match=%v, got %v", tt.wantMatch, match)
+ }
+ })
+ }
+}
From 9e0caccd43a379aea36acce7da11098ccbc0584c Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Thu, 12 Mar 2026 09:44:35 +0100
Subject: [PATCH 03/24] Remove example multicluster configuration from
cortex-scheduling-controllers
---
helm/bundles/cortex-nova/values.yaml | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index 138cddcf0..b2dbba788 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -117,24 +117,6 @@ cortex-scheduling-controllers:
enabledTasks:
- nova-decisions-cleanup-task
- # List: iterate over all api servers with the gvk and return a combined list.
- # Get: iterate over all api server with the gvk and return the first result that is found.
- # Create/Update/Delete: based on the content of the resource, match api server with labels and execute op only there.
- # WatchesMulticluster: watch resource in all clusters with the gvk.
- multicluster:
- # These resources will be written into the home cluster if no match is found.
- fallbacks:
- - gvk: "cortex.cloud/v1alpha1/Decision"
- - # ...
- apiservers:
- - host: "https://api.cc-b0-qa-de-1.ccloud.external.a-qa-de-200.soil-garden.qa-de-1.cloud.sap"
- gvks: # Used for List, Get, Watches
- - "cortex.cloud/v1alpha1/Decision"
- - "cortex.cloud/v1alpha1/DecisionList"
- # ...
- labels: # Used for Create/Update/Delete
- az: "qa-de-1b"
-
cortex-knowledge-controllers:
<<: *cortex
namePrefix: cortex-nova-knowledge
From b8d967dc340e25f6e870bd089f2e24bec62a7139 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Thu, 12 Mar 2026 09:48:46 +0100
Subject: [PATCH 04/24] Resolve linter issues
---
pkg/multicluster/client.go | 1 -
pkg/multicluster/client_test.go | 9 +++++++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 8e2411a26..bf956e0ad 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -520,7 +520,6 @@ func (c *Client) IndexField(ctx context.Context, obj client.Object, list client.
return err
}
// Collect all unique caches to index.
- type cacheKey struct{}
indexed := make(map[any]bool)
for _, cl := range c.ClustersForGVK(gvkObj) {
ch := cl.GetCache()
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index 2fa9a8f1a..dc2ab2a42 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -452,7 +452,9 @@ func TestGVKFromHomeScheme_NilScheme(t *testing.T) {
t.Error("expected panic with nil scheme")
}
}()
- _, _ = c.GVKFromHomeScheme(&corev1.ConfigMap{})
+ if _, err := c.GVKFromHomeScheme(&corev1.ConfigMap{}); err == nil {
+ t.Error("expected panic with nil scheme")
+ }
}
func TestClient_Get_SingleRemoteCluster(t *testing.T) {
@@ -963,7 +965,10 @@ func TestClient_DeleteAllOf_MultipleClusters(t *testing.T) {
for i, remote := range []*fakeCluster{remote1, remote2} {
cmList := &corev1.ConfigMapList{}
- _ = remote.GetClient().List(context.Background(), cmList, client.InNamespace("default"))
+ if err := remote.GetClient().List(context.Background(), cmList, client.InNamespace("default")); err != nil {
+ t.Fatalf("failed to list objects in remote%d: %v", i+1, err)
+ }
+
if len(cmList.Items) != 0 {
t.Errorf("expected 0 items in remote%d, got %d", i+1, len(cmList.Items))
}
From 204cf2c37717e912ae4e2a3f6d5f2f2603135388 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Thu, 12 Mar 2026 10:11:31 +0100
Subject: [PATCH 05/24] Add Hypervisor resource router to multicluster client
---
cmd/main.go | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/cmd/main.go b/cmd/main.go
index 4e4865567..20dbd8391 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -18,6 +18,7 @@ import (
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
@@ -264,10 +265,14 @@ func main() {
setupLog.Error(err, "unable to add home cluster")
os.Exit(1)
}
+ hvGVK := schema.GroupVersionKind{Group: "kvm.cloud.sap", Version: "v1", Kind: "Hypervisor"}
multiclusterClient := &multicluster.Client{
HomeCluster: homeCluster,
HomeRestConfig: restConfig,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]multicluster.ResourceRouter{
+ hvGVK: multicluster.HypervisorResourceRouter{},
+ },
}
multiclusterClientConfig := conf.GetConfigOrDie[multicluster.ClientConfig]()
if err := multiclusterClient.InitFromConf(ctx, mgr, multiclusterClientConfig); err != nil {
From 7a8a8fbbcbdddb427f5f61560356a93cc84b2697 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Fri, 13 Mar 2026 15:49:55 +0100
Subject: [PATCH 06/24] Feedback
---
helm/bundles/cortex-cinder/values.yaml | 18 ++
helm/bundles/cortex-ironcore/values.yaml | 24 +++
helm/bundles/cortex-manila/values.yaml | 18 ++
helm/bundles/cortex-nova/values.yaml | 20 +++
helm/bundles/cortex-pods/values.yaml | 21 +++
internal/knowledge/extractor/trigger.go | 46 ++---
internal/knowledge/kpis/controller.go | 66 ++++----
.../filter_weigher_pipeline_controller.go | 98 ++++++-----
.../filter_weigher_pipeline_controller.go | 78 +++++----
.../filter_weigher_pipeline_controller.go | 72 ++++----
.../nova/detector_pipeline_controller.go | 72 ++++----
.../filter_weigher_pipeline_controller.go | 86 ++++++----
.../filter_weigher_pipeline_controller.go | 70 ++++----
pkg/multicluster/builder.go | 20 ++-
pkg/multicluster/builder_test.go | 27 +--
pkg/multicluster/client.go | 129 ++++++++------
pkg/multicluster/client_test.go | 159 +++++++++---------
17 files changed, 608 insertions(+), 416 deletions(-)
diff --git a/helm/bundles/cortex-cinder/values.yaml b/helm/bundles/cortex-cinder/values.yaml
index f002fc58b..9421f3f16 100644
--- a/helm/bundles/cortex-cinder/values.yaml
+++ b/helm/bundles/cortex-cinder/values.yaml
@@ -70,6 +70,24 @@ cortex: &cortex
prometheus: {enable: false}
conf: &cortexConf
schedulingDomain: cinder
+ apiservers:
+ home:
+ gvks:
+ - cortex.cloud/v1alpha1/Decision
+ - cortex.cloud/v1alpha1/DecisionList
+ - cortex.cloud/v1alpha1/Descheduling
+ - cortex.cloud/v1alpha1/DeschedulingList
+ - cortex.cloud/v1alpha1/Pipeline
+ - cortex.cloud/v1alpha1/PipelineList
+ - cortex.cloud/v1alpha1/Knowledge
+ - cortex.cloud/v1alpha1/KnowledgeList
+ - cortex.cloud/v1alpha1/Datasource
+ - cortex.cloud/v1alpha1/DatasourceList
+ - cortex.cloud/v1alpha1/KPI
+ - cortex.cloud/v1alpha1/KPIList
+ - cortex.cloud/v1alpha1/Reservation
+ - cortex.cloud/v1alpha1/ReservationList
+ - v1/Secret
keystoneSecretRef:
name: cortex-cinder-openstack-keystone
namespace: default
diff --git a/helm/bundles/cortex-ironcore/values.yaml b/helm/bundles/cortex-ironcore/values.yaml
index 2f885c7a5..c14e60f05 100644
--- a/helm/bundles/cortex-ironcore/values.yaml
+++ b/helm/bundles/cortex-ironcore/values.yaml
@@ -30,6 +30,30 @@ cortex:
conf:
# The operator will only touch CRs with this scheduling domain name.
schedulingDomain: machines
+ apiservers:
+ home:
+ gvks:
+ - cortex.cloud/v1alpha1/Decision
+ - cortex.cloud/v1alpha1/DecisionList
+ - cortex.cloud/v1alpha1/Descheduling
+ - cortex.cloud/v1alpha1/DeschedulingList
+ - cortex.cloud/v1alpha1/Pipeline
+ - cortex.cloud/v1alpha1/PipelineList
+ - cortex.cloud/v1alpha1/Knowledge
+ - cortex.cloud/v1alpha1/KnowledgeList
+ - cortex.cloud/v1alpha1/Datasource
+ - cortex.cloud/v1alpha1/DatasourceList
+ - cortex.cloud/v1alpha1/KPI
+ - cortex.cloud/v1alpha1/KPIList
+ - cortex.cloud/v1alpha1/Reservation
+ - cortex.cloud/v1alpha1/ReservationList
+ - compute.ironcore.dev/v1alpha1/Machine
+ - compute.ironcore.dev/v1alpha1/MachineList
+ - compute.ironcore.dev/v1alpha1/MachinePool
+ - compute.ironcore.dev/v1alpha1/MachinePoolList
+ - compute.ironcore.dev/v1alpha1/MachineClass
+ - compute.ironcore.dev/v1alpha1/MachineClassList
+ - v1/Secret
enabledControllers:
- ironcore-decisions-pipeline-controller
- explanation-controller
diff --git a/helm/bundles/cortex-manila/values.yaml b/helm/bundles/cortex-manila/values.yaml
index cc341a112..613f7dc3b 100644
--- a/helm/bundles/cortex-manila/values.yaml
+++ b/helm/bundles/cortex-manila/values.yaml
@@ -70,6 +70,24 @@ cortex: &cortex
prometheus: {enable: false}
conf: &cortexConf
schedulingDomain: manila
+ apiservers:
+ home:
+ gvks:
+ - cortex.cloud/v1alpha1/Decision
+ - cortex.cloud/v1alpha1/DecisionList
+ - cortex.cloud/v1alpha1/Descheduling
+ - cortex.cloud/v1alpha1/DeschedulingList
+ - cortex.cloud/v1alpha1/Pipeline
+ - cortex.cloud/v1alpha1/PipelineList
+ - cortex.cloud/v1alpha1/Knowledge
+ - cortex.cloud/v1alpha1/KnowledgeList
+ - cortex.cloud/v1alpha1/Datasource
+ - cortex.cloud/v1alpha1/DatasourceList
+ - cortex.cloud/v1alpha1/KPI
+ - cortex.cloud/v1alpha1/KPIList
+ - cortex.cloud/v1alpha1/Reservation
+ - cortex.cloud/v1alpha1/ReservationList
+ - v1/Secret
keystoneSecretRef:
name: cortex-manila-openstack-keystone
namespace: default
diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml
index b2dbba788..5154cfdc2 100644
--- a/helm/bundles/cortex-nova/values.yaml
+++ b/helm/bundles/cortex-nova/values.yaml
@@ -80,6 +80,26 @@ cortex: &cortex
prometheus: {enable: false}
conf: &cortexConf
schedulingDomain: nova
+ apiservers:
+ home:
+ gvks:
+ - cortex.cloud/v1alpha1/Decision
+ - cortex.cloud/v1alpha1/DecisionList
+ - cortex.cloud/v1alpha1/Descheduling
+ - cortex.cloud/v1alpha1/DeschedulingList
+ - cortex.cloud/v1alpha1/Pipeline
+ - cortex.cloud/v1alpha1/PipelineList
+ - cortex.cloud/v1alpha1/Knowledge
+ - cortex.cloud/v1alpha1/KnowledgeList
+ - cortex.cloud/v1alpha1/Datasource
+ - cortex.cloud/v1alpha1/DatasourceList
+ - cortex.cloud/v1alpha1/KPI
+ - cortex.cloud/v1alpha1/KPIList
+ - cortex.cloud/v1alpha1/Reservation
+ - cortex.cloud/v1alpha1/ReservationList
+ - kvm.cloud.sap/v1/Hypervisor
+ - kvm.cloud.sap/v1/HypervisorList
+ - v1/Secret
keystoneSecretRef:
name: cortex-nova-openstack-keystone
namespace: default
diff --git a/helm/bundles/cortex-pods/values.yaml b/helm/bundles/cortex-pods/values.yaml
index b7aab8a6d..809bf9e88 100644
--- a/helm/bundles/cortex-pods/values.yaml
+++ b/helm/bundles/cortex-pods/values.yaml
@@ -30,6 +30,27 @@ cortex:
conf:
# The operator will only touch CRs with this scheduling domain name.
schedulingDomain: pods
+ apiservers:
+ home:
+ gvks:
+ - cortex.cloud/v1alpha1/Decision
+ - cortex.cloud/v1alpha1/DecisionList
+ - cortex.cloud/v1alpha1/Descheduling
+ - cortex.cloud/v1alpha1/DeschedulingList
+ - cortex.cloud/v1alpha1/Pipeline
+ - cortex.cloud/v1alpha1/PipelineList
+ - cortex.cloud/v1alpha1/Knowledge
+ - cortex.cloud/v1alpha1/KnowledgeList
+ - cortex.cloud/v1alpha1/Datasource
+ - cortex.cloud/v1alpha1/DatasourceList
+ - cortex.cloud/v1alpha1/KPI
+ - cortex.cloud/v1alpha1/KPIList
+ - cortex.cloud/v1alpha1/Reservation
+ - cortex.cloud/v1alpha1/ReservationList
+ - v1/Secret
+ - v1/Pod
+ - v1/NodeList
+ - v1/Binding
enabledControllers:
- pods-decisions-pipeline-controller
- explanation-controller
diff --git a/internal/knowledge/extractor/trigger.go b/internal/knowledge/extractor/trigger.go
index fd5287e26..454f16c61 100644
--- a/internal/knowledge/extractor/trigger.go
+++ b/internal/knowledge/extractor/trigger.go
@@ -253,25 +253,31 @@ func (r *TriggerReconciler) mapKnowledgeToKnowledge(ctx context.Context, obj cli
// SetupWithManager sets up the controller with the Manager
func (r *TriggerReconciler) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
- return multicluster.BuildController(mcl, mgr).
- // Watch datasource changes and map them to trigger reconciliation
- WatchesMulticluster(
- &v1alpha1.Datasource{},
- handler.EnqueueRequestsFromMapFunc(r.mapDatasourceToKnowledge),
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- ds := obj.(*v1alpha1.Datasource)
- return ds.Spec.SchedulingDomain == r.Conf.SchedulingDomain
- }),
- ).
- // Watch knowledge changes and map them to trigger reconciliation
- WatchesMulticluster(
- &v1alpha1.Knowledge{},
- handler.EnqueueRequestsFromMapFunc(r.mapKnowledgeToKnowledge),
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- k := obj.(*v1alpha1.Knowledge)
- return k.Spec.SchedulingDomain == r.Conf.SchedulingDomain
- }),
- ).
- Named("cortex-knowledge-trigger").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch datasource changes and map them to trigger reconciliation
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Datasource{},
+ handler.EnqueueRequestsFromMapFunc(r.mapDatasourceToKnowledge),
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ ds := obj.(*v1alpha1.Datasource)
+ return ds.Spec.SchedulingDomain == r.Conf.SchedulingDomain
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch knowledge changes and map them to trigger reconciliation
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ handler.EnqueueRequestsFromMapFunc(r.mapKnowledgeToKnowledge),
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ k := obj.(*v1alpha1.Knowledge)
+ return k.Spec.SchedulingDomain == r.Conf.SchedulingDomain
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-knowledge-trigger").
Complete(r)
}
diff --git a/internal/knowledge/kpis/controller.go b/internal/knowledge/kpis/controller.go
index 8f3f6fd74..03e4c34d4 100644
--- a/internal/knowledge/kpis/controller.go
+++ b/internal/knowledge/kpis/controller.go
@@ -443,36 +443,42 @@ func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Cli
if err := mgr.Add(manager.RunnableFunc(c.InitAllKPIs)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- // Watch datasource changes so that we can reconfigure kpis as needed.
- WatchesMulticluster(
- &v1alpha1.Datasource{},
- handler.Funcs{
- CreateFunc: c.handleDatasourceCreated,
- UpdateFunc: c.handleDatasourceUpdated,
- DeleteFunc: c.handleDatasourceDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to datasources matching the scheduling domain.
- ds := obj.(*v1alpha1.Datasource)
- return ds.Spec.SchedulingDomain == c.Config.SchedulingDomain
- }),
- ).
- // Watch knowledge changes so that we can reconfigure kpis as needed.
- WatchesMulticluster(
- &v1alpha1.Knowledge{},
- handler.Funcs{
- CreateFunc: c.handleKnowledgeCreated,
- UpdateFunc: c.handleKnowledgeUpdated,
- DeleteFunc: c.handleKnowledgeDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to knowledges matching the scheduling domain.
- kn := obj.(*v1alpha1.Knowledge)
- return kn.Spec.SchedulingDomain == c.Config.SchedulingDomain
- }),
- ).
- Named("cortex-kpis").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch datasource changes so that we can reconfigure kpis as needed.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Datasource{},
+ handler.Funcs{
+ CreateFunc: c.handleDatasourceCreated,
+ UpdateFunc: c.handleDatasourceUpdated,
+ DeleteFunc: c.handleDatasourceDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ // Only react to datasources matching the scheduling domain.
+ ds := obj.(*v1alpha1.Datasource)
+ return ds.Spec.SchedulingDomain == c.Config.SchedulingDomain
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch knowledge changes so that we can reconfigure kpis as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ handler.Funcs{
+ CreateFunc: c.handleKnowledgeCreated,
+ UpdateFunc: c.handleKnowledgeUpdated,
+ DeleteFunc: c.handleKnowledgeDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ // Only react to knowledges matching the scheduling domain.
+ kn := obj.(*v1alpha1.Knowledge)
+ return kn.Spec.SchedulingDomain == c.Config.SchedulingDomain
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-kpis").
For(
&v1alpha1.KPI{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go
index 0d8771081..64f3c3bd5 100644
--- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go
@@ -159,52 +159,58 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- // Watch pipeline changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Pipeline{},
- handler.Funcs{
- CreateFunc: c.HandlePipelineCreated,
- UpdateFunc: c.HandlePipelineUpdated,
- DeleteFunc: c.HandlePipelineDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pipeline := obj.(*v1alpha1.Pipeline)
- // Only react to pipelines matching the scheduling domain.
- if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder {
- return false
- }
- return pipeline.Spec.Type == c.PipelineType()
- }),
- ).
- // Watch knowledge changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Knowledge{},
- handler.Funcs{
- CreateFunc: c.HandleKnowledgeCreated,
- UpdateFunc: c.HandleKnowledgeUpdated,
- DeleteFunc: c.HandleKnowledgeDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- knowledge := obj.(*v1alpha1.Knowledge)
- // Only react to knowledge matching the scheduling domain.
- return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder
- }),
- ).
- For(
- &v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- decision := obj.(*v1alpha1.Decision)
- if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder {
- return false
- }
- // Ignore already decided schedulings.
- if decision.Status.Result != nil {
- return false
- }
- return true
- })),
- ).
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch pipeline changes so that we can reconfigure pipelines as needed.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Pipeline{},
+ handler.Funcs{
+ CreateFunc: c.HandlePipelineCreated,
+ UpdateFunc: c.HandlePipelineUpdated,
+ DeleteFunc: c.HandlePipelineDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pipeline := obj.(*v1alpha1.Pipeline)
+ // Only react to pipelines matching the scheduling domain.
+ if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder {
+ return false
+ }
+ return pipeline.Spec.Type == c.PipelineType()
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch knowledge changes so that we can reconfigure pipelines as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ handler.Funcs{
+ CreateFunc: c.HandleKnowledgeCreated,
+ UpdateFunc: c.HandleKnowledgeUpdated,
+ DeleteFunc: c.HandleKnowledgeDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ knowledge := obj.(*v1alpha1.Knowledge)
+ // Only react to knowledge matching the scheduling domain.
+ return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.For(
+ &v1alpha1.Decision{},
+ builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ decision := obj.(*v1alpha1.Decision)
+ if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder {
+ return false
+ }
+ // Ignore already decided schedulings.
+ if decision.Status.Result != nil {
+ return false
+ }
+ return true
+ })),
+ ).
Named("cortex-cinder-decisions").
Complete(c)
}
diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go
index 2b0c44f64..4316e0509 100644
--- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go
@@ -241,42 +241,48 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- WatchesMulticluster(
- &ironcorev1alpha1.Machine{},
- c.handleMachine(),
- // Only schedule machines that have the custom scheduler set.
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- machine := obj.(*ironcorev1alpha1.Machine)
- if machine.Spec.MachinePoolRef != nil {
- // Skip machines that already have a machine pool assigned.
- return false
- }
- // The machine spec currently doesn't support this field yet.
- // Thus the resource will be deserialized to an empty string.
- // We subscribe to all machines without a scheduler set for now.
- // Otherwise when deployed the machine scheduler won't do anything.
- return machine.Spec.Scheduler == ""
- }),
- ).
- // Watch pipeline changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Pipeline{},
- handler.Funcs{
- CreateFunc: c.HandlePipelineCreated,
- UpdateFunc: c.HandlePipelineUpdated,
- DeleteFunc: c.HandlePipelineDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pipeline := obj.(*v1alpha1.Pipeline)
- // Only react to pipelines matching the scheduling domain.
- if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines {
- return false
- }
- return pipeline.Spec.Type == c.PipelineType()
- }),
- ).
- Named("cortex-machine-scheduler").
+ bldr := multicluster.BuildController(mcl, mgr)
+ bldr, err := bldr.WatchesMulticluster(
+ &ironcorev1alpha1.Machine{},
+ c.handleMachine(),
+ // Only schedule machines that have the custom scheduler set.
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ machine := obj.(*ironcorev1alpha1.Machine)
+ if machine.Spec.MachinePoolRef != nil {
+ // Skip machines that already have a machine pool assigned.
+ return false
+ }
+ // The machine spec currently doesn't support this field yet.
+ // Thus the resource will be deserialized to an empty string.
+ // We subscribe to all machines without a scheduler set for now.
+ // Otherwise when deployed the machine scheduler won't do anything.
+ return machine.Spec.Scheduler == ""
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch pipeline changes so that we can reconfigure pipelines as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Pipeline{},
+ handler.Funcs{
+ CreateFunc: c.HandlePipelineCreated,
+ UpdateFunc: c.HandlePipelineUpdated,
+ DeleteFunc: c.HandlePipelineDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pipeline := obj.(*v1alpha1.Pipeline)
+ // Only react to pipelines matching the scheduling domain.
+ if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines {
+ return false
+ }
+ return pipeline.Spec.Type == c.PipelineType()
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-machine-scheduler").
For(
&v1alpha1.Decision{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go
index 3b63d64e0..b01ad46f8 100644
--- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go
@@ -159,39 +159,45 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- // Watch pipeline changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Pipeline{},
- handler.Funcs{
- CreateFunc: c.HandlePipelineCreated,
- UpdateFunc: c.HandlePipelineUpdated,
- DeleteFunc: c.HandlePipelineDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pipeline := obj.(*v1alpha1.Pipeline)
- // Only react to pipelines matching the scheduling domain.
- if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila {
- return false
- }
- return pipeline.Spec.Type == c.PipelineType()
- }),
- ).
- // Watch knowledge changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Knowledge{},
- handler.Funcs{
- CreateFunc: c.HandleKnowledgeCreated,
- UpdateFunc: c.HandleKnowledgeUpdated,
- DeleteFunc: c.HandleKnowledgeDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- knowledge := obj.(*v1alpha1.Knowledge)
- // Only react to knowledge matching the scheduling domain.
- return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila
- }),
- ).
- Named("cortex-manila-decisions").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch pipeline changes so that we can reconfigure pipelines as needed.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Pipeline{},
+ handler.Funcs{
+ CreateFunc: c.HandlePipelineCreated,
+ UpdateFunc: c.HandlePipelineUpdated,
+ DeleteFunc: c.HandlePipelineDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pipeline := obj.(*v1alpha1.Pipeline)
+ // Only react to pipelines matching the scheduling domain.
+ if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila {
+ return false
+ }
+ return pipeline.Spec.Type == c.PipelineType()
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch knowledge changes so that we can reconfigure pipelines as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ handler.Funcs{
+ CreateFunc: c.HandleKnowledgeCreated,
+ UpdateFunc: c.HandleKnowledgeUpdated,
+ DeleteFunc: c.HandleKnowledgeDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ knowledge := obj.(*v1alpha1.Knowledge)
+ // Only react to knowledge matching the scheduling domain.
+ return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-manila-decisions").
For(
&v1alpha1.Decision{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
diff --git a/internal/scheduling/nova/detector_pipeline_controller.go b/internal/scheduling/nova/detector_pipeline_controller.go
index 68d7d1ed5..98aea329f 100644
--- a/internal/scheduling/nova/detector_pipeline_controller.go
+++ b/internal/scheduling/nova/detector_pipeline_controller.go
@@ -135,39 +135,45 @@ func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *mul
if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- // Watch pipeline changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Pipeline{},
- handler.Funcs{
- CreateFunc: c.HandlePipelineCreated,
- UpdateFunc: c.HandlePipelineUpdated,
- DeleteFunc: c.HandlePipelineDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pipeline := obj.(*v1alpha1.Pipeline)
- // Only react to pipelines matching the scheduling domain.
- if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
- return false
- }
- return pipeline.Spec.Type == c.PipelineType()
- }),
- ).
- // Watch knowledge changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Knowledge{},
- handler.Funcs{
- CreateFunc: c.HandleKnowledgeCreated,
- UpdateFunc: c.HandleKnowledgeUpdated,
- DeleteFunc: c.HandleKnowledgeDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- knowledge := obj.(*v1alpha1.Knowledge)
- // Only react to knowledge matching the scheduling domain.
- return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainNova
- }),
- ).
- Named("cortex-nova-deschedulings").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch pipeline changes so that we can reconfigure pipelines as needed.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Pipeline{},
+ handler.Funcs{
+ CreateFunc: c.HandlePipelineCreated,
+ UpdateFunc: c.HandlePipelineUpdated,
+ DeleteFunc: c.HandlePipelineDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pipeline := obj.(*v1alpha1.Pipeline)
+ // Only react to pipelines matching the scheduling domain.
+ if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
+ return false
+ }
+ return pipeline.Spec.Type == c.PipelineType()
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch knowledge changes so that we can reconfigure pipelines as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ handler.Funcs{
+ CreateFunc: c.HandleKnowledgeCreated,
+ UpdateFunc: c.HandleKnowledgeUpdated,
+ DeleteFunc: c.HandleKnowledgeDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ knowledge := obj.(*v1alpha1.Knowledge)
+ // Only react to knowledge matching the scheduling domain.
+ return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainNova
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-nova-deschedulings").
For(
&v1alpha1.Descheduling{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go
index 301e6076a..02d6ebd97 100644
--- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go
@@ -185,43 +185,55 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- // Watch pipeline changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Pipeline{},
- handler.Funcs{
- CreateFunc: c.HandlePipelineCreated,
- UpdateFunc: c.HandlePipelineUpdated,
- DeleteFunc: c.HandlePipelineDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pipeline := obj.(*v1alpha1.Pipeline)
- // Only react to pipelines matching the scheduling domain.
- if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
- return false
- }
- return pipeline.Spec.Type == c.PipelineType()
- }),
- ).
- // Watch knowledge changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Knowledge{},
- handler.Funcs{
- CreateFunc: c.HandleKnowledgeCreated,
- UpdateFunc: c.HandleKnowledgeUpdated,
- DeleteFunc: c.HandleKnowledgeDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- knowledge := obj.(*v1alpha1.Knowledge)
- // Only react to knowledge matching the scheduling domain.
- return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainNova
- }),
- ).
- // Watch hypervisor changes so the cache gets updated.
- WatchesMulticluster(&hv1.Hypervisor{}, handler.Funcs{}).
- // Watch reservation changes so the cache gets updated.
- WatchesMulticluster(&v1alpha1.Reservation{}, handler.Funcs{}).
- Named("cortex-nova-decisions").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch pipeline changes so that we can reconfigure pipelines as needed.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Pipeline{},
+ handler.Funcs{
+ CreateFunc: c.HandlePipelineCreated,
+ UpdateFunc: c.HandlePipelineUpdated,
+ DeleteFunc: c.HandlePipelineDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pipeline := obj.(*v1alpha1.Pipeline)
+ // Only react to pipelines matching the scheduling domain.
+ if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
+ return false
+ }
+ return pipeline.Spec.Type == c.PipelineType()
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch knowledge changes so that we can reconfigure pipelines as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ handler.Funcs{
+ CreateFunc: c.HandleKnowledgeCreated,
+ UpdateFunc: c.HandleKnowledgeUpdated,
+ DeleteFunc: c.HandleKnowledgeDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ knowledge := obj.(*v1alpha1.Knowledge)
+ // Only react to knowledge matching the scheduling domain.
+ return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainNova
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch hypervisor changes so the cache gets updated.
+ bldr, err = bldr.WatchesMulticluster(&hv1.Hypervisor{}, handler.Funcs{})
+ if err != nil {
+ return err
+ }
+ // Watch reservation changes so the cache gets updated.
+ bldr, err = bldr.WatchesMulticluster(&v1alpha1.Reservation{}, handler.Funcs{})
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-nova-decisions").
For(
&v1alpha1.Decision{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go
index 28e10ff88..2ca4998e9 100644
--- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go
@@ -252,38 +252,44 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- WatchesMulticluster(
- &corev1.Pod{},
- c.handlePod(),
- // Only schedule pods that have a custom scheduler set.
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pod := obj.(*corev1.Pod)
- if pod.Spec.NodeName != "" {
- // Skip pods that already have a node assigned.
- return false
- }
- return pod.Spec.SchedulerName == string(v1alpha1.SchedulingDomainPods)
- }),
- ).
- // Watch pipeline changes so that we can reconfigure pipelines as needed.
- WatchesMulticluster(
- &v1alpha1.Pipeline{},
- handler.Funcs{
- CreateFunc: c.HandlePipelineCreated,
- UpdateFunc: c.HandlePipelineUpdated,
- DeleteFunc: c.HandlePipelineDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- pipeline := obj.(*v1alpha1.Pipeline)
- // Only react to pipelines matching the scheduling domain.
- if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods {
- return false
- }
- return pipeline.Spec.Type == v1alpha1.PipelineTypeFilterWeigher
- }),
- ).
- Named("cortex-pod-scheduler").
+ bldr := multicluster.BuildController(mcl, mgr)
+ bldr, err := bldr.WatchesMulticluster(
+ &corev1.Pod{},
+ c.handlePod(),
+ // Only schedule pods that have a custom scheduler set.
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pod := obj.(*corev1.Pod)
+ if pod.Spec.NodeName != "" {
+ // Skip pods that already have a node assigned.
+ return false
+ }
+ return pod.Spec.SchedulerName == string(v1alpha1.SchedulingDomainPods)
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ // Watch pipeline changes so that we can reconfigure pipelines as needed.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Pipeline{},
+ handler.Funcs{
+ CreateFunc: c.HandlePipelineCreated,
+ UpdateFunc: c.HandlePipelineUpdated,
+ DeleteFunc: c.HandlePipelineDeleted,
+ },
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ pipeline := obj.(*v1alpha1.Pipeline)
+ // Only react to pipelines matching the scheduling domain.
+ if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods {
+ return false
+ }
+ return pipeline.Spec.Type == v1alpha1.PipelineTypeFilterWeigher
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-pod-scheduler").
For(
&v1alpha1.Decision{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
diff --git a/pkg/multicluster/builder.go b/pkg/multicluster/builder.go
index 53342232b..62d9f2fcd 100644
--- a/pkg/multicluster/builder.go
+++ b/pkg/multicluster/builder.go
@@ -4,6 +4,8 @@
package multicluster
import (
+ "fmt"
+
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -33,19 +35,19 @@ type MulticlusterBuilder struct {
// WatchesMulticluster watches a resource across all clusters that serve its GVK.
// If the GVK is served by multiple remote clusters, a watch is set up on each.
-func (b MulticlusterBuilder) WatchesMulticluster(object client.Object, eventHandler handler.TypedEventHandler[client.Object, reconcile.Request], predicates ...predicate.Predicate) MulticlusterBuilder {
+// Returns an error if the GVK is not configured in any cluster.
+func (b MulticlusterBuilder) WatchesMulticluster(object client.Object, eventHandler handler.TypedEventHandler[client.Object, reconcile.Request], predicates ...predicate.Predicate) (MulticlusterBuilder, error) {
gvk, err := b.multiclusterClient.GVKFromHomeScheme(object)
if err != nil {
- // Fall back to home cluster if GVK lookup fails.
- clusterCache := b.multiclusterClient.HomeCluster.GetCache()
- b.Builder = b.WatchesRawSource(source.Kind(clusterCache, object, eventHandler, predicates...))
- return b
+ return b, fmt.Errorf("failed to resolve GVK for %T: %w", object, err)
}
-
- // Add a watch for each remote cluster serving the GVK
- for _, cl := range b.multiclusterClient.ClustersForGVK(gvk) {
+ clusters, err := b.multiclusterClient.ClustersForGVK(gvk)
+ if err != nil {
+ return b, fmt.Errorf("no clusters configured for GVK %s: %w", gvk, err)
+ }
+ for _, cl := range clusters {
clusterCache := cl.GetCache()
b.Builder = b.WatchesRawSource(source.Kind(clusterCache, object, eventHandler, predicates...))
}
- return b
+ return b, nil
}
diff --git a/pkg/multicluster/builder_test.go b/pkg/multicluster/builder_test.go
index 662756484..c1bd2ba0e 100644
--- a/pkg/multicluster/builder_test.go
+++ b/pkg/multicluster/builder_test.go
@@ -32,7 +32,7 @@ func TestMulticlusterBuilder_Fields(t *testing.T) {
}
}
-func TestClient_ClustersForGVK_Integration(t *testing.T) {
+func TestClient_ClustersForGVK_UnknownGVKReturnsError(t *testing.T) {
c := &Client{
HomeCluster: nil,
remoteClusters: nil,
@@ -44,26 +44,29 @@ func TestClient_ClustersForGVK_Integration(t *testing.T) {
Kind: "Deployment",
}
- result := c.ClustersForGVK(gvk)
- // With nil remoteClusters and nil HomeCluster, returns [nil].
- if len(result) != 1 {
- t.Errorf("expected 1 cluster, got %d", len(result))
+ _, err := c.ClustersForGVK(gvk)
+ if err == nil {
+ t.Error("expected error for unknown GVK")
}
}
-func TestClient_ClustersForGVK_LookupOrder(t *testing.T) {
- c := &Client{
- remoteClusters: make(map[schema.GroupVersionKind][]remoteCluster),
- }
-
+func TestClient_ClustersForGVK_HomeGVKReturnsHomeCluster(t *testing.T) {
gvk := schema.GroupVersionKind{
Group: "apps",
Version: "v1",
Kind: "Deployment",
}
- // No remote clusters for this GVK, returns home cluster (nil).
- result := c.ClustersForGVK(gvk)
+ c := &Client{
+ remoteClusters: make(map[schema.GroupVersionKind][]remoteCluster),
+ homeGVKs: map[schema.GroupVersionKind]bool{gvk: true},
+ }
+
+ // GVK is in homeGVKs, returns home cluster (nil).
+ result, err := c.ClustersForGVK(gvk)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if len(result) != 1 {
t.Errorf("expected 1 cluster, got %d", len(result))
}
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index bf956e0ad..1aeb04352 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -44,29 +44,35 @@ type Client struct {
// Mutex to protect access to remoteClusters.
remoteClustersMu sync.RWMutex
- // GVKs for which a write operation falls back to the home cluster
- // when no remote cluster matches.
- fallbackGVKs map[schema.GroupVersionKind]bool
+ // GVKs explicitly configured for the home cluster.
+ homeGVKs map[schema.GroupVersionKind]bool
}
type ClientConfig struct {
- // Fallback GVKs that are written to the home cluster if no router match is found.
- Fallbacks []FallbackConfig `json:"fallbacks,omitempty"`
- // Apiserver overrides that map GVKs to remote clusters.
- APIServerOverrides []APIServerOverride `json:"apiservers,omitempty"`
+ // Apiserver configuration mapping GVKs to home or remote clusters.
+ // Every GVK used through the multicluster client must be listed
+ // in either Home or Remotes. Unknown GVKs will cause an error.
+ APIServers APIServersConfig `json:"apiservers"`
}
-// FallbackConfig specifies a GVK that falls back to the home cluster for writes
-// when no remote cluster matches.
-type FallbackConfig struct {
- // The resource GVK formatted as "//".
- GVK string `json:"gvk"`
+// APIServersConfig separates resources into home and remote clusters.
+type APIServersConfig struct {
+ // Resources managed in the cluster where cortex is deployed.
+ Home HomeConfig `json:"home"`
+ // Resources managed in remote clusters.
+ Remotes []RemoteConfig `json:"remotes,omitempty"`
}
-// APIServerOverride maps multiple GVKs to a remote kubernetes apiserver with
+// HomeConfig lists GVKs that are managed in the home cluster.
+type HomeConfig struct {
+ // The resource GVKs formatted as "//".
+ GVKs []string `json:"gvks"`
+}
+
+// RemoteConfig maps multiple GVKs to a remote kubernetes apiserver with
// routing labels. It is assumed that the remote apiserver accepts the
// serviceaccount tokens issued by the local cluster.
-type APIServerOverride struct {
+type RemoteConfig struct {
// The remote kubernetes apiserver url, e.g. "https://my-apiserver:6443".
Host string `json:"host"`
// The root CA certificate to verify the remote apiserver.
@@ -93,27 +99,27 @@ func (c *Client) InitFromConf(ctx context.Context, mgr ctrl.Manager, conf Client
for gvkStr := range gvksByConfStr {
log.Info("scheme gvk registered", "gvk", gvkStr)
}
- // Parse fallback GVKs.
- c.fallbackGVKs = make(map[schema.GroupVersionKind]bool)
- for _, fb := range conf.Fallbacks {
- gvk, ok := gvksByConfStr[fb.GVK]
+ // Parse home GVKs.
+ c.homeGVKs = make(map[schema.GroupVersionKind]bool)
+ for _, gvkStr := range conf.APIServers.Home.GVKs {
+ gvk, ok := gvksByConfStr[gvkStr]
if !ok {
- return errors.New("no gvk registered for fallback " + fb.GVK)
+ return errors.New("no gvk registered for home " + gvkStr)
}
- log.Info("registering fallback gvk", "gvk", gvk)
- c.fallbackGVKs[gvk] = true
+ log.Info("registering home gvk", "gvk", gvk)
+ c.homeGVKs[gvk] = true
}
- // Parse apiserver overrides.
- for _, override := range conf.APIServerOverrides {
+ // Parse remote apiserver configs.
+ for _, remote := range conf.APIServers.Remotes {
var resolvedGVKs []schema.GroupVersionKind
- for _, gvkStr := range override.GVKs {
+ for _, gvkStr := range remote.GVKs {
gvk, ok := gvksByConfStr[gvkStr]
if !ok {
- return errors.New("no gvk registered for API server override " + gvkStr)
+ return errors.New("no gvk registered for remote apiserver " + gvkStr)
}
resolvedGVKs = append(resolvedGVKs, gvk)
}
- cl, err := c.AddRemote(ctx, override.Host, override.CACert, override.Labels, resolvedGVKs...)
+ cl, err := c.AddRemote(ctx, remote.Host, remote.CACert, remote.Labels, resolvedGVKs...)
if err != nil {
return err
}
@@ -173,44 +179,45 @@ func (c *Client) GVKFromHomeScheme(obj runtime.Object) (gvk schema.GroupVersionK
}
// ClustersForGVK returns all clusters that serve the given GVK.
-// If no remote clusters are configured, only the home cluster is returned.
-// For fallback GVKs with remote clusters, the home cluster is appended
-// because resources might have been written there as a fallback.
-func (c *Client) ClustersForGVK(gvk schema.GroupVersionKind) []cluster.Cluster {
+// The GVK must be explicitly configured in either homeGVKs or remoteClusters.
+// Returns an error if the GVK is unknown.
+func (c *Client) ClustersForGVK(gvk schema.GroupVersionKind) ([]cluster.Cluster, error) {
c.remoteClustersMu.RLock()
defer c.remoteClustersMu.RUnlock()
remotes := c.remoteClusters[gvk]
- if len(remotes) == 0 {
- return []cluster.Cluster{c.HomeCluster}
+ isHome := c.homeGVKs[gvk]
+ if len(remotes) == 0 && !isHome {
+ return nil, fmt.Errorf("GVK %s is not configured in home or any remote cluster", gvk)
}
clusters := make([]cluster.Cluster, 0, len(remotes)+1)
for _, r := range remotes {
clusters = append(clusters, r.cluster)
}
- if c.fallbackGVKs[gvk] {
+ if isHome {
clusters = append(clusters, c.HomeCluster)
}
- return clusters
+ return clusters, nil
}
// clusterForWrite uses a ResourceRouter to determine which remote cluster
// a resource should be written to based on the resource content and cluster labels.
//
-// If no remote clusters are configured for the GVK, the home cluster is returned.
-// If a ResourceRouter is configured and matches a cluster, that cluster is returned.
-// If no match is found and the GVK has a fallback configured, the home cluster is returned.
-// Otherwise an error is returned.
+// The GVK must be explicitly configured. If configured for home, the home cluster
+// is returned. If configured for remotes, the ResourceRouter determines the target.
+// Returns an error if the GVK is unknown or no remote cluster matches.
func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.Cluster, error) {
c.remoteClustersMu.RLock()
defer c.remoteClustersMu.RUnlock()
remotes := c.remoteClusters[gvk]
if len(remotes) == 0 {
- return c.HomeCluster, nil
+ if c.homeGVKs[gvk] {
+ return c.HomeCluster, nil
+ }
+ return nil, fmt.Errorf("GVK %s is not configured in home or any remote cluster", gvk)
}
router, ok := c.ResourceRouters[gvk]
if !ok {
// If there are more than one remote cluster and no router, we don't know which one to write to.
- // That's why we need to return an error in that case. If there's only one remote cluster, we can safely assume
if len(remotes) == 1 {
return remotes[0].cluster, nil
}
@@ -225,12 +232,7 @@ func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.
return r.cluster, nil
}
}
-
- // If we can't match any remote cluster but the GVK is configured to fall back to the home cluster, return the home cluster.
- if c.fallbackGVKs[gvk] {
- return c.HomeCluster, nil
- }
- return nil, fmt.Errorf("no cluster matched for GVK %s and no fallback configured", gvk)
+ return nil, fmt.Errorf("no cluster matched for GVK %s", gvk)
}
// Get iterates over all clusters with the GVK and returns the first result found.
@@ -240,7 +242,10 @@ func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Objec
if err != nil {
return err
}
- clusters := c.ClustersForGVK(gvk)
+ clusters, err := c.ClustersForGVK(gvk)
+ if err != nil {
+ return err
+ }
var lastErr error
for _, cl := range clusters {
err := cl.GetClient().Get(ctx, key, obj, opts...)
@@ -261,7 +266,10 @@ func (c *Client) List(ctx context.Context, list client.ObjectList, opts ...clien
if err != nil {
return err
}
- clusters := c.ClustersForGVK(gvk)
+ clusters, err := c.ClustersForGVK(gvk)
+ if err != nil {
+ return err
+ }
var allItems []runtime.Object
for _, cl := range clusters {
@@ -346,7 +354,11 @@ func (c *Client) DeleteAllOf(ctx context.Context, obj client.Object, opts ...cli
if err != nil {
return err
}
- for _, cl := range c.ClustersForGVK(gvk) {
+ clusters, err := c.ClustersForGVK(gvk)
+ if err != nil {
+ return err
+ }
+ for _, cl := range clusters {
if err := cl.GetClient().DeleteAllOf(ctx, obj, opts...); err != nil {
return err
}
@@ -447,7 +459,10 @@ func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Obj
if err != nil {
return err
}
- clusters := c.multiclusterClient.ClustersForGVK(gvk)
+ clusters, err := c.multiclusterClient.ClustersForGVK(gvk)
+ if err != nil {
+ return err
+ }
var lastErr error
for _, cl := range clusters {
err := cl.GetClient().SubResource(c.subResource).Get(ctx, obj, subResource, opts...)
@@ -511,7 +526,7 @@ func (c *subResourceClient) Apply(ctx context.Context, obj runtime.ApplyConfigur
// Usually, you want to index the same field in both the object and list type,
// as both would be mapped to individual clients based on their GVK.
func (c *Client) IndexField(ctx context.Context, obj client.Object, list client.ObjectList, field string, extractValue client.IndexerFunc) error {
- gvkObj, err := c.GVKFromHomeScheme(obj)
+ gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
}
@@ -521,7 +536,11 @@ func (c *Client) IndexField(ctx context.Context, obj client.Object, list client.
}
// Collect all unique caches to index.
indexed := make(map[any]bool)
- for _, cl := range c.ClustersForGVK(gvkObj) {
+ clusters, err := c.ClustersForGVK(gvk)
+ if err != nil {
+ return err
+ }
+ for _, cl := range clusters {
ch := cl.GetCache()
if indexed[ch] {
continue
@@ -531,7 +550,11 @@ func (c *Client) IndexField(ctx context.Context, obj client.Object, list client.
return err
}
}
- for _, cl := range c.ClustersForGVK(gvkList) {
+ clustersList, err := c.ClustersForGVK(gvkList)
+ if err != nil {
+ return err
+ }
+ for _, cl := range clustersList {
ch := cl.GetCache()
if indexed[ch] {
continue
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index dc2ab2a42..6103f1e0a 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -165,15 +165,19 @@ func TestSubResourceClient_Apply(t *testing.T) {
}
}
-func TestClient_ClustersForGVK_NoRemoteClusters(t *testing.T) {
+func TestClient_ClustersForGVK_HomeGVKOnly(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
- clusters := c.ClustersForGVK(configMapGVK)
+ clusters, err := c.ClustersForGVK(configMapGVK)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if len(clusters) != 1 {
t.Fatalf("expected 1 cluster, got %d", len(clusters))
}
@@ -182,6 +186,19 @@ func TestClient_ClustersForGVK_NoRemoteClusters(t *testing.T) {
}
}
+func TestClient_ClustersForGVK_UnknownGVK(t *testing.T) {
+ scheme := newTestScheme(t)
+ c := &Client{
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ }
+
+ _, err := c.ClustersForGVK(configMapGVK)
+ if err == nil {
+ t.Error("expected error for unknown GVK")
+ }
+}
+
func TestClient_ClustersForGVK_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
@@ -194,7 +211,10 @@ func TestClient_ClustersForGVK_SingleRemoteCluster(t *testing.T) {
},
}
- clusters := c.ClustersForGVK(configMapGVK)
+ clusters, err := c.ClustersForGVK(configMapGVK)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if len(clusters) != 1 {
t.Fatalf("expected 1 cluster, got %d", len(clusters))
}
@@ -219,13 +239,16 @@ func TestClient_ClustersForGVK_MultipleRemoteClusters(t *testing.T) {
},
}
- clusters := c.ClustersForGVK(configMapGVK)
+ clusters, err := c.ClustersForGVK(configMapGVK)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if len(clusters) != 2 {
t.Fatalf("expected 2 clusters, got %d", len(clusters))
}
}
-func TestClient_ClustersForGVK_FallbackIncludesHome(t *testing.T) {
+func TestClient_ClustersForGVK_HomeAndRemote(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
remote := newFakeCluster(scheme)
@@ -235,27 +258,31 @@ func TestClient_ClustersForGVK_FallbackIncludesHome(t *testing.T) {
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
},
- fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
- clusters := c.ClustersForGVK(configMapGVK)
+ clusters, err := c.ClustersForGVK(configMapGVK)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if len(clusters) != 2 {
- t.Fatalf("expected 2 clusters (remote + home fallback), got %d", len(clusters))
+ t.Fatalf("expected 2 clusters (remote + home), got %d", len(clusters))
}
if clusters[0] != remote {
t.Error("expected remote cluster first")
}
if clusters[1] != homeCluster {
- t.Error("expected home cluster as fallback")
+ t.Error("expected home cluster second")
}
}
-func TestClient_clusterForWrite_NoRemoteClusters(t *testing.T) {
+func TestClient_clusterForWrite_HomeGVK(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
cl, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
@@ -263,7 +290,20 @@ func TestClient_clusterForWrite_NoRemoteClusters(t *testing.T) {
t.Fatalf("unexpected error: %v", err)
}
if cl != homeCluster {
- t.Error("expected home cluster when no remotes configured")
+ t.Error("expected home cluster for home GVK")
+ }
+}
+
+func TestClient_clusterForWrite_UnknownGVK(t *testing.T) {
+ scheme := newTestScheme(t)
+ c := &Client{
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ }
+
+ _, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
+ if err == nil {
+ t.Error("expected error for unknown GVK")
}
}
@@ -319,40 +359,7 @@ func TestClient_clusterForWrite_RouterMatches(t *testing.T) {
}
}
-func TestClient_clusterForWrite_NoMatchWithFallback(t *testing.T) {
- scheme := newTestScheme(t)
- homeCluster := newFakeCluster(scheme)
- remote1 := newFakeCluster(scheme)
- remote2 := newFakeCluster(scheme)
- c := &Client{
- HomeCluster: homeCluster,
- HomeScheme: scheme,
- ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
- configMapGVK: testRouter{},
- },
- remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
- configMapGVK: {
- {cluster: remote1, labels: map[string]string{"az": "az-1"}},
- {cluster: remote2, labels: map[string]string{"az": "az-2"}},
- },
- },
- fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
- }
-
- // Object with az-3 doesn't match any remote cluster.
- obj := &corev1.ConfigMap{
- ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"az": "az-3"}},
- }
- cl, err := c.clusterForWrite(configMapGVK, obj)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- if cl != homeCluster {
- t.Error("expected home cluster as fallback")
- }
-}
-
-func TestClient_clusterForWrite_NoMatchNoFallback(t *testing.T) {
+func TestClient_clusterForWrite_NoMatch(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
remote1 := newFakeCluster(scheme)
@@ -375,7 +382,7 @@ func TestClient_clusterForWrite_NoMatchNoFallback(t *testing.T) {
}
_, err := c.clusterForWrite(configMapGVK, obj)
if err == nil {
- t.Error("expected error when no match and no fallback")
+ t.Error("expected error when no remote cluster matches")
}
}
@@ -517,7 +524,7 @@ func TestClient_Get_MultiCluster_FirstFound(t *testing.T) {
}
func TestClient_Get_MultiCluster_NotFound(t *testing.T) {
- // Iterate all remote clusters and return NotFound if object is not found in any cluster (including home cluster if fallback is enabled).
+ // Iterate all remote clusters and return NotFound if object is not found in any cluster.
// In this test, the object doesn't exist in any cluster, so NotFound should be returned.
scheme := newTestScheme(t)
@@ -555,7 +562,7 @@ func TestClient_Get_UnknownType(t *testing.T) {
}
}
-func TestClient_Get_FallbackCluster(t *testing.T) {
+func TestClient_Get_HomeGVKCluster(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme, &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{Name: "home-cm", Namespace: "default"},
@@ -569,7 +576,7 @@ func TestClient_Get_FallbackCluster(t *testing.T) {
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote}},
},
- fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
cm := &corev1.ConfigMap{}
@@ -638,7 +645,7 @@ func TestClient_List_MultipleClusters_CombinesResults(t *testing.T) {
}
}
-func TestClient_List_FallbackIncludesHome(t *testing.T) {
+func TestClient_List_HomeGVKIncludesHome(t *testing.T) {
scheme := newTestScheme(t)
remote := newFakeCluster(scheme,
&corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "remote-cm", Namespace: "default"}},
@@ -653,7 +660,7 @@ func TestClient_List_FallbackIncludesHome(t *testing.T) {
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapListGVK: {{cluster: remote}},
},
- fallbackGVKs: map[schema.GroupVersionKind]bool{configMapListGVK: true},
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapListGVK: true},
}
cmList := &corev1.ConfigMapList{}
@@ -675,6 +682,7 @@ func TestClient_List_HomeClusterOnly(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapListGVK: true},
}
cmList := &corev1.ConfigMapList{}
@@ -760,7 +768,7 @@ func TestClient_Create_RouterMatchesCluster(t *testing.T) {
}
}
-func TestClient_Create_FallbackToHome(t *testing.T) {
+func TestClient_Create_NoMatchReturnsError(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
@@ -775,27 +783,20 @@ func TestClient_Create_FallbackToHome(t *testing.T) {
{cluster: newFakeCluster(scheme), labels: map[string]string{"az": "az-1"}},
},
},
- fallbackGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
}
- // Object with az-99 doesn't match any remote — should fall back to home.
+ // Object with az-99 doesn't match any remote — should error.
cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
- Name: "fallback-cm",
+ Name: "no-match-cm",
Namespace: "default",
Labels: map[string]string{"az": "az-99"},
},
}
err := c.Create(context.Background(), cm)
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
-
- result := &corev1.ConfigMap{}
- err = homeCluster.GetClient().Get(context.Background(), client.ObjectKey{Name: "fallback-cm", Namespace: "default"}, result)
- if err != nil {
- t.Fatalf("expected to find in home cluster (fallback): %v", err)
+ if err == nil {
+ t.Error("expected error when no remote cluster matches")
}
}
@@ -982,7 +983,7 @@ func TestStatusClient_Update(t *testing.T) {
Status: corev1.PodStatus{Phase: corev1.PodPending},
}
homeCluster := newFakeCluster(scheme, pod)
- c := &Client{HomeCluster: homeCluster, HomeScheme: scheme}
+ c := &Client{HomeCluster: homeCluster, HomeScheme: scheme, homeGVKs: map[schema.GroupVersionKind]bool{podGVK: true}}
ctx := context.Background()
existingPod := &corev1.Pod{}
@@ -1011,7 +1012,7 @@ func TestStatusClient_Patch(t *testing.T) {
Status: corev1.PodStatus{Phase: corev1.PodPending},
}
homeCluster := newFakeCluster(scheme, pod)
- c := &Client{HomeCluster: homeCluster, HomeScheme: scheme}
+ c := &Client{HomeCluster: homeCluster, HomeScheme: scheme, homeGVKs: map[schema.GroupVersionKind]bool{podGVK: true}}
ctx := context.Background()
existingPod := &corev1.Pod{}
@@ -1204,6 +1205,10 @@ func TestClient_IndexField_HomeClusterOnly(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ homeGVKs: map[schema.GroupVersionKind]bool{
+ configMapGVK: true,
+ configMapListGVK: true,
+ },
}
err := c.IndexField(context.Background(), &corev1.ConfigMap{}, &corev1.ConfigMapList{}, "metadata.name", func(obj client.Object) []string {
@@ -1232,7 +1237,7 @@ func TestClient_ConcurrentAddRemoteAndRead(t *testing.T) {
for range 10 {
wg.Go(func() {
for range 100 {
- _ = c.ClustersForGVK(configMapGVK)
+ _, _ = c.ClustersForGVK(configMapGVK)
}
})
}
@@ -1285,7 +1290,7 @@ func TestClient_InitFromConf_EmptyConfig(t *testing.T) {
}
}
-func TestClient_InitFromConf_UnregisteredGVK(t *testing.T) {
+func TestClient_InitFromConf_UnregisteredRemoteGVK(t *testing.T) {
scheme := newTestScheme(t)
c := &Client{
HomeCluster: newFakeCluster(scheme),
@@ -1293,21 +1298,23 @@ func TestClient_InitFromConf_UnregisteredGVK(t *testing.T) {
}
mgr := &fakeManager{}
conf := ClientConfig{
- APIServerOverrides: []APIServerOverride{
- {
- Host: "https://remote-api:6443",
- GVKs: []string{"unregistered.group/v1/UnknownKind"},
+ APIServers: APIServersConfig{
+ Remotes: []RemoteConfig{
+ {
+ Host: "https://remote-api:6443",
+ GVKs: []string{"unregistered.group/v1/UnknownKind"},
+ },
},
},
}
err := c.InitFromConf(context.Background(), mgr, conf)
if err == nil {
- t.Fatal("expected error for unregistered GVK")
+ t.Fatal("expected error for unregistered remote GVK")
}
}
-func TestClient_InitFromConf_UnregisteredFallbackGVK(t *testing.T) {
+func TestClient_InitFromConf_UnregisteredHomeGVK(t *testing.T) {
scheme := newTestScheme(t)
c := &Client{
HomeCluster: newFakeCluster(scheme),
@@ -1315,12 +1322,14 @@ func TestClient_InitFromConf_UnregisteredFallbackGVK(t *testing.T) {
}
mgr := &fakeManager{}
conf := ClientConfig{
- Fallbacks: []FallbackConfig{{GVK: "unregistered.group/v1/UnknownKind"}},
+ APIServers: APIServersConfig{
+ Home: HomeConfig{GVKs: []string{"unregistered.group/v1/UnknownKind"}},
+ },
}
err := c.InitFromConf(context.Background(), mgr, conf)
if err == nil {
- t.Fatal("expected error for unregistered fallback GVK")
+ t.Fatal("expected error for unregistered home GVK")
}
}
From 0f75530d132db255c6bc323ab336173224155f53 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Fri, 13 Mar 2026 15:52:07 +0100
Subject: [PATCH 07/24] Lint fix
---
pkg/multicluster/client_test.go | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index 6103f1e0a..601da8ac4 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -1237,7 +1237,9 @@ func TestClient_ConcurrentAddRemoteAndRead(t *testing.T) {
for range 10 {
wg.Go(func() {
for range 100 {
- _, _ = c.ClustersForGVK(configMapGVK)
+ if _, err := c.ClustersForGVK(configMapGVK); err != nil {
+ t.Errorf("unexpected error: %v", err)
+ }
}
})
}
From eb4b7936d811a432a6847a38fdb27c37f53311cc Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Tue, 17 Mar 2026 13:04:07 +0100
Subject: [PATCH 08/24] Fix copy paste error
---
internal/knowledge/kpis/controller.go | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/internal/knowledge/kpis/controller.go b/internal/knowledge/kpis/controller.go
index 03e4c34d4..92ca00728 100644
--- a/internal/knowledge/kpis/controller.go
+++ b/internal/knowledge/kpis/controller.go
@@ -482,9 +482,9 @@ func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Cli
For(
&v1alpha1.KPI{},
builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to datasources matching the scheduling domain.
- ds := obj.(*v1alpha1.KPI)
- return ds.Spec.SchedulingDomain == c.Config.SchedulingDomain
+ // Only react to kpis matching the scheduling domain.
+ kpi := obj.(*v1alpha1.KPI)
+ return kpi.Spec.SchedulingDomain == c.Config.SchedulingDomain
})),
).
Complete(c)
From a1763a5295e50b2f41dfa985adec2b1916cc5aad Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Tue, 17 Mar 2026 13:13:14 +0100
Subject: [PATCH 09/24] Initial draft of multi-az multicluster guide [skip ci]
---
...ex-remote.yaml => cortex-remote-az-a.yaml} | 2 +-
.../multicluster/cortex-remote-az-b.yaml | 27 ++++++
.../guides/multicluster/hypervisors-az-a.yaml | 13 +++
.../guides/multicluster/hypervisors-az-b.yaml | 13 +++
docs/guides/multicluster/readme.md | 92 ++++++++++++-------
5 files changed, 112 insertions(+), 35 deletions(-)
rename docs/guides/multicluster/{cortex-remote.yaml => cortex-remote-az-a.yaml} (97%)
create mode 100644 docs/guides/multicluster/cortex-remote-az-b.yaml
create mode 100644 docs/guides/multicluster/hypervisors-az-a.yaml
create mode 100644 docs/guides/multicluster/hypervisors-az-b.yaml
diff --git a/docs/guides/multicluster/cortex-remote.yaml b/docs/guides/multicluster/cortex-remote-az-a.yaml
similarity index 97%
rename from docs/guides/multicluster/cortex-remote.yaml
rename to docs/guides/multicluster/cortex-remote-az-a.yaml
index 675a14063..61fa24631 100644
--- a/docs/guides/multicluster/cortex-remote.yaml
+++ b/docs/guides/multicluster/cortex-remote-az-a.yaml
@@ -1,6 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
-name: cortex-remote
+name: cortex-remote-az-a
nodes:
- role: control-plane
extraPortMappings:
diff --git a/docs/guides/multicluster/cortex-remote-az-b.yaml b/docs/guides/multicluster/cortex-remote-az-b.yaml
new file mode 100644
index 000000000..47848e749
--- /dev/null
+++ b/docs/guides/multicluster/cortex-remote-az-b.yaml
@@ -0,0 +1,27 @@
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+name: cortex-remote-az-b
+nodes:
+ - role: control-plane
+ extraPortMappings:
+ - containerPort: 6443
+ hostPort: 8445
+ extraMounts:
+ - hostPath: /tmp/root-ca-home.pem
+ containerPath: /etc/ca-certificates/root-ca.pem
+ kubeadmConfigPatches:
+ - |
+ kind: ClusterConfiguration
+ apiServer:
+ extraArgs:
+ oidc-client-id: "https://host.docker.internal:8443" # = audience
+ oidc-issuer-url: "https://host.docker.internal:8443"
+ oidc-username-claim: sub
+ oidc-ca-file: /etc/ca-certificates/root-ca.pem
+ certSANs:
+ - api-proxy
+ - api-proxy.default.svc
+ - api-proxy.default.svc.cluster.local
+ - localhost
+ - 127.0.0.1
+ - host.docker.internal
\ No newline at end of file
diff --git a/docs/guides/multicluster/hypervisors-az-a.yaml b/docs/guides/multicluster/hypervisors-az-a.yaml
new file mode 100644
index 000000000..f6417afb3
--- /dev/null
+++ b/docs/guides/multicluster/hypervisors-az-a.yaml
@@ -0,0 +1,13 @@
+apiVersion: kvm.cloud.sap/v1
+kind: Hypervisor
+metadata:
+ name: hypervisor-1-az-a
+ labels:
+ topology.kubernetes.io/zone: cortex-remote-az-a
+---
+apiVersion: kvm.cloud.sap/v1
+kind: Hypervisor
+metadata:
+ name: hypervisor-2-az-a
+ labels:
+ topology.kubernetes.io/zone: cortex-remote-az-a
\ No newline at end of file
diff --git a/docs/guides/multicluster/hypervisors-az-b.yaml b/docs/guides/multicluster/hypervisors-az-b.yaml
new file mode 100644
index 000000000..19c636c16
--- /dev/null
+++ b/docs/guides/multicluster/hypervisors-az-b.yaml
@@ -0,0 +1,13 @@
+apiVersion: kvm.cloud.sap/v1
+kind: Hypervisor
+metadata:
+ name: hypervisor-1-az-b
+ labels:
+ topology.kubernetes.io/zone: cortex-remote-az-b
+---
+apiVersion: kvm.cloud.sap/v1
+kind: Hypervisor
+metadata:
+ name: hypervisor-2-az-b
+ labels:
+ topology.kubernetes.io/zone: cortex-remote-az-b
\ No newline at end of file
diff --git a/docs/guides/multicluster/readme.md b/docs/guides/multicluster/readme.md
index 2b421b443..bac318fb2 100644
--- a/docs/guides/multicluster/readme.md
+++ b/docs/guides/multicluster/readme.md
@@ -1,38 +1,42 @@
# Cortex Multi-Cluster Testing
-Cortex provides support for multi-cluster deployments, where a "home" cluster hosts the cortex pods and one or more "remote" clusters are used to persist CRDs. A typical use case for this would be to offload the etcd storage for Cortex CRDs to a remote cluster, reducing the resource usage on the home cluster.
+Cortex provides support for multi-cluster deployments, where a "home" cluster hosts the cortex pods and one or more "remote" clusters are used to persist CRDs. A typical use case for this would be to offload the etcd storage for Cortex CRDs to a remote cluster, reducing the resource usage on the home cluster. Similarly, another use case is to have multiple remote clusters that maintain all the compute workloads and expose resources that Cortex needs to access, such as the `Hypervisor` resource.
-This guide will walk you through setting up a multi-cluster Cortex deployment using [kind](https://kind.sigs.k8s.io/). We will create two kind clusters: `cortex-home` and `cortex-remote`. The `cortex-home` cluster will host the Cortex control plane, while the `cortex-remote` cluster will be used to store CRDs.
+This guide will walk you through setting up a multi-cluster Cortex deployment using [kind](https://kind.sigs.k8s.io/). We will create three kind clusters: `cortex-home`, `cortex-remote-az-a`, and `cortex-remote-az-b`. The `cortex-home` cluster will host the Cortex control plane, while the `cortex-remote-az-a` and `cortex-remote-az-b` clusters will be used to store hypervisor CRDs.
-To store its CRDs in the `cortex-remote` cluster, the `cortex-home` cluster needs to be able to authenticate to the `cortex-remote` cluster's API server. We will achieve this by configuring the `cortex-remote` cluster to trust the service account tokens issued by the `cortex-home` cluster. In this way, no external OIDC provider is needed, because the `cortex-home` cluster's own OIDC issuer for service accounts acts as the identity provider.
+To store its CRDs in the `cortex-remote-*` clusters, the `cortex-home` cluster needs to be able to authenticate to the `cortex-remote-*` clusters' API servers. We will achieve this by configuring the `cortex-remote-*` clusters to trust the service account tokens issued by the `cortex-home` cluster. In this way, no external OIDC provider is needed, because the `cortex-home` cluster's own OIDC issuer for service accounts acts as the identity provider.
Here is a diagram illustrating the authentication flow:
```mermaid
sequenceDiagram
participant Home as cortex-home
- participant Remote as cortex-remote
+ participant RemoteA as cortex-remote-az-a
+ participant RemoteB as cortex-remote-az-b
Home->>Home: Service Account Token Issued
- Home->>Remote: API Request with Token
- Remote->>Remote: Token Verified Against Home's OIDC Issuer
- Remote->>Home: API Response
+ Home->>RemoteA: API Request with Token
+ RemoteA->>RemoteA: Token Verified Against Home's OIDC Issuer
+ RemoteA->>Home: API Response
+ Home->>RemoteB: API Request with Token
+ RemoteB->>RemoteB: Token Verified Against Home's OIDC Issuer
+ RemoteB->>Home: API Response
```
## Home Cluster Setup
-First we set up the `cortex-home` cluster. The provided kind configuration file `cortex-home.yaml` sets up the cluster with the necessary port mappings to allow communication between the two clusters. `cortex-home` will expose its API server on port `8443`, which `cortex-remote` will use to verify service account tokens through `https://host.docker.internal:8443`.
+First we set up the `cortex-home` cluster. The provided kind configuration file `cortex-home.yaml` sets up the cluster with the necessary port mappings to allow communication between the three clusters. `cortex-home` will expose its API server on port `8443`, which `cortex-remote-az-a` and `cortex-remote-az-b` will use to verify service account tokens through `https://host.docker.internal:8443`.
```bash
kind create cluster --config docs/guides/multicluster/cortex-home.yaml
```
-Next, we need to expose the OIDC issuer endpoint of the `cortex-home` cluster's API server to the `cortex-remote` cluster. We do this by creating a `ClusterRoleBinding` that grants the `system:service-account-issuer-discovery` role to the `kube-system` service account in the `cortex-home` cluster.
+Next, we need to expose the OIDC issuer endpoint of the `cortex-home` cluster's API server to the `cortex-remote-*` clusters. We do this by creating a `ClusterRoleBinding` that grants the `system:service-account-issuer-discovery` role to the `kube-system` service account in the `cortex-home` cluster.
```bash
kubectl --context kind-cortex-home apply -f docs/guides/multicluster/cortex-home-crb.yaml
```
-To talk back to the `cortex-home` cluster's OIDC endpoint, the `cortex-remote` cluster needs to trust the root CA certificate used by the `cortex-home` cluster's API server. We can extract this certificate from the `extension-apiserver-authentication` config map in the `kube-system` namespace, and save it to a temporary file for later use.
+To talk back to the `cortex-home` cluster's OIDC endpoint, the `cortex-remote-*` clusters need to trust the root CA certificate used by the `cortex-home` cluster's API server. We can extract this certificate from the `extension-apiserver-authentication` config map in the `kube-system` namespace, and save it to a temporary file for later use.
```bash
kubectl --context kind-cortex-home --namespace kube-system \
@@ -42,67 +46,87 @@ kubectl --context kind-cortex-home --namespace kube-system \
## Remote Cluster Setup
-With all the prerequisites in place, we can now set up the `cortex-remote` cluster. We create the cluster using the provided kind configuration file `cortex-remote.yaml`. This configuration will tell the `cortex-remote` cluster to trust the `cortex-home` cluster's API server as OIDC issuer for service account token verification. Also, the `cortex-remote` cluster will trust the root CA certificate we extracted earlier. The `cortex-remote` apiserver will be accessible at `https://host.docker.internal:8444`.
+With all the prerequisites in place, we can now set up the `cortex-remote-*` clusters. We create the clusters using the provided kind configuration files `cortex-remote-az-a.yaml` and `cortex-remote-az-b.yaml`. These configurations will tell the `cortex-remote-*` clusters to trust the `cortex-home` cluster's API server as OIDC issuer for service account token verification. Also, the `cortex-remote-*` clusters will trust the root CA certificate we extracted earlier. The `cortex-remote-*` apiservers will be accessible at `https://host.docker.internal:8444` and `https://host.docker.internal:8445`, respectively.
```bash
-kind create cluster --config docs/guides/multicluster/cortex-remote.yaml
+kind create cluster --config docs/guides/multicluster/cortex-remote-az-a.yaml
+kind create cluster --config docs/guides/multicluster/cortex-remote-az-b.yaml
```
-Next, we need to create a `ClusterRoleBinding` in the `cortex-remote` cluster that grants service accounts coming from the `cortex-home` cluster access to the appropriate resources. We do this by applying the provided `cortex-remote-crb.yaml` file.
+Next, we need to create a `ClusterRoleBinding` in the `cortex-remote-*` clusters that grants service accounts coming from the `cortex-home` cluster access to the appropriate resources. We do this by applying the provided `cortex-remote-crb.yaml` file.
```bash
-kubectl --context kind-cortex-remote apply -f docs/guides/multicluster/cortex-remote-crb.yaml
+kubectl --context kind-cortex-remote-az-a apply -f docs/guides/multicluster/cortex-remote-crb.yaml
+kubectl --context kind-cortex-remote-az-b apply -f docs/guides/multicluster/cortex-remote-crb.yaml
```
## Deploying Cortex
-Before we launch cortex make sure that the CRDs are installed in the `cortex-remote` cluster.
+Before we launch cortex make sure that the CRDs are installed in the `cortex-remote-*` clusters.
```bash
-kubectl config use-context kind-cortex-remote
+kubectl config use-context kind-cortex-remote-az-a
+helm install helm/bundles/cortex-crds --generate-name
+kubectl config use-context kind-cortex-remote-az-b
helm install helm/bundles/cortex-crds --generate-name
```
-Also, we need to extract the root CA certificate used by the `cortex-remote` cluster's API server, so that we can configure the cortex pods in the `cortex-home` cluster to trust it.
+Also, we need to extract the root CA certificate used by the `cortex-remote-*` clusters' API servers, so that we can configure the cortex pods in the `cortex-home` cluster to trust them.
```bash
-kubectl --context kind-cortex-remote --namespace kube-system \
+kubectl --context kind-cortex-remote-az-a --namespace kube-system \
+ get configmap extension-apiserver-authentication \
+ -o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-remote-az-a.pem
+kubectl --context kind-cortex-remote-az-b --namespace kube-system \
get configmap extension-apiserver-authentication \
- -o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-remote.pem
+ -o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-remote-az-b.pem
```
-Now we can deploy cortex to the `cortex-home` cluster, configuring it to use the `cortex-remote` cluster for CRD storage. We create a temporary Helm values override file that specifies the API server URL and root CA certificate for the `cortex-remote` cluster. In this example, we are configuring the `decisions.cortex.cloud/v1alpha1` resource to be stored in the `cortex-remote` cluster.
+Now we can deploy cortex to the `cortex-home` cluster, configuring it to use the `cortex-remote-*` clusters for CRD storage. We create a temporary Helm values override file that specifies the API server URLs and root CA certificate for the `cortex-remote-*` clusters. In this example, we are configuring the `kvm.cloud.sap/v1/Hypervisor` resource to be stored in the `cortex-remote-*` clusters.
```bash
export TILT_OVERRIDES_PATH=/tmp/cortex-values.yaml
tee $TILT_OVERRIDES_PATH <
Date: Tue, 17 Mar 2026 13:08:01 +0100
Subject: [PATCH 10/24] CodeRabbit feedback
---
pkg/multicluster/builder_test.go | 29 ++++++++++++++++++++++++++---
pkg/multicluster/client.go | 2 +-
2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/pkg/multicluster/builder_test.go b/pkg/multicluster/builder_test.go
index c1bd2ba0e..56458ba53 100644
--- a/pkg/multicluster/builder_test.go
+++ b/pkg/multicluster/builder_test.go
@@ -50,6 +50,28 @@ func TestClient_ClustersForGVK_UnknownGVKReturnsError(t *testing.T) {
}
}
+func TestClient_ClustersForGVK_HomeGVKNilHomeClusterReturnsEmpty(t *testing.T) {
+ gvk := schema.GroupVersionKind{
+ Group: "apps",
+ Version: "v1",
+ Kind: "Deployment",
+ }
+
+ c := &Client{
+ remoteClusters: make(map[schema.GroupVersionKind][]remoteCluster),
+ homeGVKs: map[schema.GroupVersionKind]bool{gvk: true},
+ }
+
+ // GVK is in homeGVKs but HomeCluster is nil, so no clusters are returned.
+ result, err := c.ClustersForGVK(gvk)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(result) != 0 {
+ t.Errorf("expected 0 clusters when HomeCluster is nil, got %d", len(result))
+ }
+}
+
func TestClient_ClustersForGVK_HomeGVKReturnsHomeCluster(t *testing.T) {
gvk := schema.GroupVersionKind{
Group: "apps",
@@ -57,12 +79,13 @@ func TestClient_ClustersForGVK_HomeGVKReturnsHomeCluster(t *testing.T) {
Kind: "Deployment",
}
+ home := &fakeCluster{}
c := &Client{
+ HomeCluster: home,
remoteClusters: make(map[schema.GroupVersionKind][]remoteCluster),
homeGVKs: map[schema.GroupVersionKind]bool{gvk: true},
}
- // GVK is in homeGVKs, returns home cluster (nil).
result, err := c.ClustersForGVK(gvk)
if err != nil {
t.Fatalf("unexpected error: %v", err)
@@ -70,7 +93,7 @@ func TestClient_ClustersForGVK_HomeGVKReturnsHomeCluster(t *testing.T) {
if len(result) != 1 {
t.Errorf("expected 1 cluster, got %d", len(result))
}
- if result[0] != nil {
- t.Error("expected nil HomeCluster")
+ if result[0] != home {
+ t.Error("expected HomeCluster to be returned")
}
}
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 1aeb04352..228bd5641 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -193,7 +193,7 @@ func (c *Client) ClustersForGVK(gvk schema.GroupVersionKind) ([]cluster.Cluster,
for _, r := range remotes {
clusters = append(clusters, r.cluster)
}
- if isHome {
+ if isHome && c.HomeCluster != nil {
clusters = append(clusters, c.HomeCluster)
}
return clusters, nil
From 1b6d05cead54468ca6c31194f3d76a25c46ead64 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Tue, 17 Mar 2026 13:10:27 +0100
Subject: [PATCH 11/24] Feedback
---
pkg/multicluster/client_test.go | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index 601da8ac4..7e29265dc 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -1226,9 +1226,11 @@ func TestClient_IndexField_HomeClusterOnly(t *testing.T) {
func TestClient_ConcurrentAddRemoteAndRead(t *testing.T) {
scheme := newTestScheme(t)
c := &Client{
- HomeCluster: newFakeCluster(scheme),
- HomeScheme: scheme,
- remoteClusters: map[schema.GroupVersionKind][]remoteCluster{},
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{}},
+ },
}
var wg sync.WaitGroup
From 2c0caa3a0a70ad70c121ab36d7425a1ead40dce1 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Tue, 17 Mar 2026 13:12:37 +0100
Subject: [PATCH 12/24] Feedback
---
pkg/multicluster/routers.go | 9 +++++++--
pkg/multicluster/routers_test.go | 10 ++++++++++
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/pkg/multicluster/routers.go b/pkg/multicluster/routers.go
index 791ca4880..22b7d1a6e 100644
--- a/pkg/multicluster/routers.go
+++ b/pkg/multicluster/routers.go
@@ -19,8 +19,13 @@ type ResourceRouter interface {
type HypervisorResourceRouter struct{}
func (h HypervisorResourceRouter) Match(obj any, labels map[string]string) (bool, error) {
- hv, ok := obj.(hv1.Hypervisor)
- if !ok {
+ var hv hv1.Hypervisor
+ switch v := obj.(type) {
+ case *hv1.Hypervisor:
+ hv = *v
+ case hv1.Hypervisor:
+ hv = v
+ default:
return false, errors.New("object is not a Hypervisor")
}
az, ok := labels["az"]
diff --git a/pkg/multicluster/routers_test.go b/pkg/multicluster/routers_test.go
index 179b84818..d9598b767 100644
--- a/pkg/multicluster/routers_test.go
+++ b/pkg/multicluster/routers_test.go
@@ -30,6 +30,16 @@ func TestHypervisorResourceRouter_Match(t *testing.T) {
labels: map[string]string{"az": "qa-de-1a"},
wantMatch: true,
},
+ {
+ name: "matching AZ pointer",
+ obj: &hv1.Hypervisor{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"topology.kubernetes.io/zone": "qa-de-1a"},
+ },
+ },
+ labels: map[string]string{"az": "qa-de-1a"},
+ wantMatch: true,
+ },
{
name: "non-matching AZ",
obj: hv1.Hypervisor{
From b10b092d1334d83452ad59b327b6a4e46e9a207a Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Tue, 17 Mar 2026 13:33:54 +0100
Subject: [PATCH 13/24] Replace .For with .MultiCluster
---
.../plugins/openstack/controller.go | 35 +++++++++-------
.../plugins/prometheus/controller.go | 35 +++++++++-------
internal/knowledge/extractor/controller.go | 27 +++++++-----
internal/knowledge/kpis/controller.go | 22 ++++++----
.../filter_weigher_pipeline_controller.go | 16 ++++---
internal/scheduling/explanation/controller.go | 25 ++++++-----
.../filter_weigher_pipeline_controller.go | 34 ++++++++-------
.../filter_weigher_pipeline_controller.go | 34 ++++++++-------
.../scheduling/nova/deschedulings_cleanup.go | 20 +++++----
.../scheduling/nova/deschedulings_executor.go | 42 ++++++++++---------
.../nova/detector_pipeline_controller.go | 18 ++++----
.../filter_weigher_pipeline_controller.go | 34 ++++++++-------
.../filter_weigher_pipeline_controller.go | 34 ++++++++-------
.../reservations/controller/controller.go | 14 +++++--
14 files changed, 228 insertions(+), 162 deletions(-)
diff --git a/internal/knowledge/datasources/plugins/openstack/controller.go b/internal/knowledge/datasources/plugins/openstack/controller.go
index 06767b6ed..c25b69ab8 100644
--- a/internal/knowledge/datasources/plugins/openstack/controller.go
+++ b/internal/knowledge/datasources/plugins/openstack/controller.go
@@ -21,8 +21,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -237,19 +237,24 @@ func (r *OpenStackDatasourceReconciler) Reconcile(ctx context.Context, req ctrl.
}
func (r *OpenStackDatasourceReconciler) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
- return multicluster.BuildController(mcl, mgr).
- Named("cortex-openstack-datasource").
- For(
- &v1alpha1.Datasource{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to datasources matching the operator.
- ds := obj.(*v1alpha1.Datasource)
- if ds.Spec.SchedulingDomain != r.Conf.SchedulingDomain {
- return false
- }
- // Only react to openstack datasources.
- return ds.Spec.Type == v1alpha1.DatasourceTypeOpenStack
- })),
- ).
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch datasource changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Datasource{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ // Only react to datasources matching the operator.
+ ds := obj.(*v1alpha1.Datasource)
+ if ds.Spec.SchedulingDomain != r.Conf.SchedulingDomain {
+ return false
+ }
+ // Only react to openstack datasources.
+ return ds.Spec.Type == v1alpha1.DatasourceTypeOpenStack
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-openstack-datasource").
Complete(r)
}
diff --git a/internal/knowledge/datasources/plugins/prometheus/controller.go b/internal/knowledge/datasources/plugins/prometheus/controller.go
index 25862a8ef..0dc0dd63b 100644
--- a/internal/knowledge/datasources/plugins/prometheus/controller.go
+++ b/internal/knowledge/datasources/plugins/prometheus/controller.go
@@ -19,8 +19,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -199,19 +199,24 @@ func (r *PrometheusDatasourceReconciler) Reconcile(ctx context.Context, req ctrl
}
func (r *PrometheusDatasourceReconciler) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
- return multicluster.BuildController(mcl, mgr).
- Named("cortex-prometheus-datasource").
- For(
- &v1alpha1.Datasource{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to datasources matching the operator.
- ds := obj.(*v1alpha1.Datasource)
- if ds.Spec.SchedulingDomain != r.Conf.SchedulingDomain {
- return false
- }
- // Only react to prometheus datasources.
- return ds.Spec.Type == v1alpha1.DatasourceTypePrometheus
- })),
- ).
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch datasource changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Datasource{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ // Only react to datasources matching the operator.
+ ds := obj.(*v1alpha1.Datasource)
+ if ds.Spec.SchedulingDomain != r.Conf.SchedulingDomain {
+ return false
+ }
+ // Only react to prometheus datasources.
+ return ds.Spec.Type == v1alpha1.DatasourceTypePrometheus
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-prometheus-datasource").
Complete(r)
}
diff --git a/internal/knowledge/extractor/controller.go b/internal/knowledge/extractor/controller.go
index cd4f63972..59da22cea 100644
--- a/internal/knowledge/extractor/controller.go
+++ b/internal/knowledge/extractor/controller.go
@@ -16,8 +16,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -215,15 +215,20 @@ func (r *KnowledgeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
func (r *KnowledgeReconciler) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
- return multicluster.BuildController(mcl, mgr).
- Named("cortex-knowledge").
- For(
- &v1alpha1.Knowledge{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to datasources matching the operator.
- ds := obj.(*v1alpha1.Knowledge)
- return ds.Spec.SchedulingDomain == r.Conf.SchedulingDomain
- })),
- ).
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch knowledge changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Knowledge{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ // Only react to datasources matching the operator.
+ ds := obj.(*v1alpha1.Knowledge)
+ return ds.Spec.SchedulingDomain == r.Conf.SchedulingDomain
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-knowledge").
Complete(r)
}
diff --git a/internal/knowledge/kpis/controller.go b/internal/knowledge/kpis/controller.go
index 92ca00728..c35c714d9 100644
--- a/internal/knowledge/kpis/controller.go
+++ b/internal/knowledge/kpis/controller.go
@@ -18,7 +18,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -478,14 +477,19 @@ func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Cli
if err != nil {
return err
}
+ // Watch kpi changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.KPI{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ // Only react to kpis matching the scheduling domain.
+ kpi := obj.(*v1alpha1.KPI)
+ return kpi.Spec.SchedulingDomain == c.Config.SchedulingDomain
+ }),
+ )
+ if err != nil {
+ return err
+ }
return bldr.Named("cortex-kpis").
- For(
- &v1alpha1.KPI{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- // Only react to kpis matching the scheduling domain.
- kpi := obj.(*v1alpha1.KPI)
- return kpi.Spec.SchedulingDomain == c.Config.SchedulingDomain
- })),
- ).
Complete(c)
}
diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go
index 64f3c3bd5..6deabaf8d 100644
--- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go
@@ -21,7 +21,6 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -197,9 +196,11 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err != nil {
return err
}
- return bldr.For(
+ // Watch decision changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
&v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
decision := obj.(*v1alpha1.Decision)
if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder {
return false
@@ -209,8 +210,11 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
return false
}
return true
- })),
- ).
- Named("cortex-cinder-decisions").
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-cinder-decisions").
Complete(c)
}
diff --git a/internal/scheduling/explanation/controller.go b/internal/scheduling/explanation/controller.go
index 7b71e0c4a..edb8ab3f6 100644
--- a/internal/scheduling/explanation/controller.go
+++ b/internal/scheduling/explanation/controller.go
@@ -11,8 +11,8 @@ import (
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
corev1 "k8s.io/api/core/v1"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
)
@@ -208,14 +208,19 @@ func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Cli
if err := mgr.Add(manager.RunnableFunc(c.StartupCallback)); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- Named("explanation-controller").
- For(
- &v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- decision := obj.(*v1alpha1.Decision)
- return c.shouldReconcileDecision(decision)
- })),
- ).
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch decision changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Decision{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ decision := obj.(*v1alpha1.Decision)
+ return c.shouldReconcileDecision(decision)
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("explanation-controller").
Complete(c)
}
diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go
index 4316e0509..5dceb4d3a 100644
--- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go
@@ -23,7 +23,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -282,20 +281,25 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err != nil {
return err
}
+ // Watch decision changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Decision{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ decision := obj.(*v1alpha1.Decision)
+ if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines {
+ return false
+ }
+ // Ignore already decided schedulings.
+ if decision.Status.Result != nil {
+ return false
+ }
+ return true
+ }),
+ )
+ if err != nil {
+ return err
+ }
return bldr.Named("cortex-machine-scheduler").
- For(
- &v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- decision := obj.(*v1alpha1.Decision)
- if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines {
- return false
- }
- // Ignore already decided schedulings.
- if decision.Status.Result != nil {
- return false
- }
- return true
- })),
- ).
Complete(c)
}
diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go
index b01ad46f8..faa13b121 100644
--- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go
@@ -21,7 +21,6 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduling/manila/plugins/weighers"
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -197,20 +196,25 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err != nil {
return err
}
+ // Watch decision changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Decision{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ decision := obj.(*v1alpha1.Decision)
+ if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila {
+ return false
+ }
+ // Ignore already decided schedulings.
+ if decision.Status.Result != nil {
+ return false
+ }
+ return true
+ }),
+ )
+ if err != nil {
+ return err
+ }
return bldr.Named("cortex-manila-decisions").
- For(
- &v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- decision := obj.(*v1alpha1.Decision)
- if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila {
- return false
- }
- // Ignore already decided schedulings.
- if decision.Status.Result != nil {
- return false
- }
- return true
- })),
- ).
Complete(c)
}
diff --git a/internal/scheduling/nova/deschedulings_cleanup.go b/internal/scheduling/nova/deschedulings_cleanup.go
index bc8d09016..e80d89697 100644
--- a/internal/scheduling/nova/deschedulings_cleanup.go
+++ b/internal/scheduling/nova/deschedulings_cleanup.go
@@ -13,6 +13,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
)
@@ -95,15 +96,20 @@ func (r *DeschedulingsCleanup) SetupWithManager(mgr ctrl.Manager, mcl *multiclus
if err := mgr.Add(&DeschedulingsCleanupOnStartup{r}); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- For(&v1alpha1.Descheduling{}).
- Named("descheduler-cleanup").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch descheduling changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Descheduling{},
+ &handler.EnqueueRequestForObject{},
+ // Watch for spec changes (when decisions are added/modified)
+ predicate.GenerationChangedPredicate{},
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("descheduler-cleanup").
WithOptions(controller.Options{
MaxConcurrentReconciles: 10,
}).
- WithEventFilter(
- // Watch for spec changes (when decisions are added/modified)
- predicate.GenerationChangedPredicate{},
- ).
Complete(r)
}
diff --git a/internal/scheduling/nova/deschedulings_executor.go b/internal/scheduling/nova/deschedulings_executor.go
index 207dc0138..d8b7ab88d 100644
--- a/internal/scheduling/nova/deschedulings_executor.go
+++ b/internal/scheduling/nova/deschedulings_executor.go
@@ -16,8 +16,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -254,23 +254,27 @@ func (e *DeschedulingsExecutor) Reconcile(ctx context.Context, req ctrl.Request)
}
func (s *DeschedulingsExecutor) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
- return multicluster.BuildController(mcl, mgr).
- Named("cortex-nova-deschedulings-executor").
- For(
- &v1alpha1.Descheduling{},
- // Only schedule machines that have the custom scheduler set.
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- deschedulings := obj.(*v1alpha1.Descheduling)
- // We only care about deschedulings that are not completed yet.
- if meta.IsStatusConditionTrue(deschedulings.Status.Conditions, v1alpha1.DeschedulingConditionInProgress) {
- return false
- }
- // We don't care about deschedulings that failed.
- if meta.IsStatusConditionFalse(deschedulings.Status.Conditions, v1alpha1.DeschedulingConditionReady) {
- return false
- }
- return true
- })),
- ).
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch descheduling changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Descheduling{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ deschedulings := obj.(*v1alpha1.Descheduling)
+ // We only care about deschedulings that are not completed yet.
+ if meta.IsStatusConditionTrue(deschedulings.Status.Conditions, v1alpha1.DeschedulingConditionInProgress) {
+ return false
+ }
+ // We don't care about deschedulings that failed.
+ if meta.IsStatusConditionFalse(deschedulings.Status.Conditions, v1alpha1.DeschedulingConditionReady) {
+ return false
+ }
+ return true
+ }),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("cortex-nova-deschedulings-executor").
Complete(s)
}
diff --git a/internal/scheduling/nova/detector_pipeline_controller.go b/internal/scheduling/nova/detector_pipeline_controller.go
index 98aea329f..dad1a6bf8 100644
--- a/internal/scheduling/nova/detector_pipeline_controller.go
+++ b/internal/scheduling/nova/detector_pipeline_controller.go
@@ -16,7 +16,6 @@ import (
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
"github.com/sapcc/go-bits/jobloop"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -173,12 +172,17 @@ func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *mul
if err != nil {
return err
}
+ // Watch descheduling changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Descheduling{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ return false // This controller does not reconcile Descheduling resources directly.
+ }),
+ )
+ if err != nil {
+ return err
+ }
return bldr.Named("cortex-nova-deschedulings").
- For(
- &v1alpha1.Descheduling{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- return false // This controller does not reconcile Descheduling resources directly.
- })),
- ).
Complete(c)
}
diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go
index 02d6ebd97..deb126250 100644
--- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go
@@ -22,7 +22,6 @@ import (
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -233,20 +232,25 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err != nil {
return err
}
+ // Watch decision changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Decision{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ decision := obj.(*v1alpha1.Decision)
+ if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
+ return false
+ }
+ // Ignore already decided schedulings.
+ if decision.Status.Result != nil {
+ return false
+ }
+ return true
+ }),
+ )
+ if err != nil {
+ return err
+ }
return bldr.Named("cortex-nova-decisions").
- For(
- &v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- decision := obj.(*v1alpha1.Decision)
- if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
- return false
- }
- // Ignore already decided schedulings.
- if decision.Status.Result != nil {
- return false
- }
- return true
- })),
- ).
Complete(c)
}
diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go
index 2ca4998e9..42b6d550b 100644
--- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go
+++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go
@@ -22,7 +22,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
- "sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -289,20 +288,25 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager,
if err != nil {
return err
}
+ // Watch decision changes across all clusters.
+ bldr, err = bldr.WatchesMulticluster(
+ &v1alpha1.Decision{},
+ &handler.EnqueueRequestForObject{},
+ predicate.NewPredicateFuncs(func(obj client.Object) bool {
+ decision := obj.(*v1alpha1.Decision)
+ if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods {
+ return false
+ }
+ // Ignore already decided schedulings.
+ if decision.Status.Result != nil {
+ return false
+ }
+ return true
+ }),
+ )
+ if err != nil {
+ return err
+ }
return bldr.Named("cortex-pod-scheduler").
- For(
- &v1alpha1.Decision{},
- builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
- decision := obj.(*v1alpha1.Decision)
- if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods {
- return false
- }
- // Ignore already decided schedulings.
- if decision.Status.Result != nil {
- return false
- }
- return true
- })),
- ).
Complete(c)
}
diff --git a/internal/scheduling/reservations/controller/controller.go b/internal/scheduling/reservations/controller/controller.go
index 4eae4cfc2..ccad943a6 100644
--- a/internal/scheduling/reservations/controller/controller.go
+++ b/internal/scheduling/reservations/controller/controller.go
@@ -25,6 +25,7 @@ import (
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/pkg/multicluster"
corev1 "k8s.io/api/core/v1"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
)
// Endpoints for the reservations operator.
@@ -242,9 +243,16 @@ func (r *ReservationReconciler) SetupWithManager(mgr ctrl.Manager, mcl *multiclu
})); err != nil {
return err
}
- return multicluster.BuildController(mcl, mgr).
- For(&v1alpha1.Reservation{}).
- Named("reservation").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch reservation changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Reservation{},
+ &handler.EnqueueRequestForObject{},
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("reservation").
WithOptions(controller.Options{
// We want to process reservations one at a time to avoid overbooking.
MaxConcurrentReconciles: 1,
From 11b7dfc31085cda2bec163c0a03c17ff0ef06e81 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Tue, 17 Mar 2026 13:42:30 +0100
Subject: [PATCH 14/24] Fix hypervisor overcommit manager
---
.../nova/hypervisor_overcommit_controller.go | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/internal/scheduling/nova/hypervisor_overcommit_controller.go b/internal/scheduling/nova/hypervisor_overcommit_controller.go
index 8d54475f0..2b9034670 100644
--- a/internal/scheduling/nova/hypervisor_overcommit_controller.go
+++ b/internal/scheduling/nova/hypervisor_overcommit_controller.go
@@ -230,13 +230,15 @@ func (c *HypervisorOvercommitController) SetupWithManager(mgr ctrl.Manager) (err
if !ok {
return errors.New("provided client must be a multicluster client")
}
- return multicluster.
- BuildController(mcl, mgr).
- // The hypervisor crd may be distributed across multiple remote clusters.
- WatchesMulticluster(&hv1.Hypervisor{},
- c.handleRemoteHypervisor(),
- c.predicateRemoteHypervisor(),
- ).
- Named("hypervisor-overcommit-controller").
+ bldr := multicluster.BuildController(mcl, mgr)
+ // The hypervisor crd may be distributed across multiple remote clusters.
+ bldr, err = bldr.WatchesMulticluster(&hv1.Hypervisor{},
+ c.handleRemoteHypervisor(),
+ c.predicateRemoteHypervisor(),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("hypervisor-overcommit-controller").
Complete(c)
}
From fd4aef5f91a902e6b17edf9ac351b46cb823ec12 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Tue, 17 Mar 2026 14:16:13 +0100
Subject: [PATCH 15/24] Advance guide [skip ci]
---
Tiltfile | 2 +-
docs/guides/multicluster/cleanup.sh | 17 +++++++
docs/guides/multicluster/readme.md | 19 +++++--
docs/guides/multicluster/run.sh | 79 +++++++++++++++++++++++++++++
4 files changed, 112 insertions(+), 5 deletions(-)
create mode 100755 docs/guides/multicluster/cleanup.sh
create mode 100755 docs/guides/multicluster/run.sh
diff --git a/Tiltfile b/Tiltfile
index 2de697c32..a42fe43f4 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -75,7 +75,7 @@ local('kubectl wait --namespace cert-manager --for=condition=available deploymen
########### Dependency CRDs
# Make sure the local cluster is running if you are running into startup issues here.
-url = 'https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/hypervisor-crd.yaml'
+url = 'https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/kvm.cloud.sap_hypervisors.yaml'
local('curl -L ' + url + ' | kubectl apply -f -')
########### Cortex Operator & CRDs
diff --git a/docs/guides/multicluster/cleanup.sh b/docs/guides/multicluster/cleanup.sh
new file mode 100755
index 000000000..cfa166036
--- /dev/null
+++ b/docs/guides/multicluster/cleanup.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -e
+
+echo "Deleting home cluster"
+kind delete cluster --name cortex-home
+
+echo "Deleting az-a and az-b clusters"
+kind delete cluster --name cortex-remote-az-a
+kind delete cluster --name cortex-remote-az-b
+
+echo "Cleaning up temporary files"
+rm -f /tmp/root-ca-home.pem \
+ /tmp/root-ca-remote-az-a.pem \
+ /tmp/root-ca-remote-az-b.pem \
+ /tmp/cortex-values.yaml \
+ /tmp/hypervisor-crd.yaml
\ No newline at end of file
diff --git a/docs/guides/multicluster/readme.md b/docs/guides/multicluster/readme.md
index bac318fb2..3050d6c51 100644
--- a/docs/guides/multicluster/readme.md
+++ b/docs/guides/multicluster/readme.md
@@ -1,5 +1,8 @@
# Cortex Multi-Cluster Testing
+> [!NOTE]
+> If you want to skip the reading part, there's `run.sh` and `cleanup.sh` scripts in this directory that will set up and tear down the multi-cluster environment for you.
+
Cortex provides support for multi-cluster deployments, where a "home" cluster hosts the cortex pods and one or more "remote" clusters are used to persist CRDs. A typical use case for this would be to offload the etcd storage for Cortex CRDs to a remote cluster, reducing the resource usage on the home cluster. Similarly, another use case is to have multiple remote clusters that maintain all the compute workloads and expose resources that Cortex needs to access, such as the `Hypervisor` resource.
This guide will walk you through setting up a multi-cluster Cortex deployment using [kind](https://kind.sigs.k8s.io/). We will create three kind clusters: `cortex-home`, `cortex-remote-az-a`, and `cortex-remote-az-b`. The `cortex-home` cluster will host the Cortex control plane, while the `cortex-remote-az-a` and `cortex-remote-az-b` clusters will be used to store hypervisor CRDs.
@@ -71,6 +74,14 @@ kubectl config use-context kind-cortex-remote-az-b
helm install helm/bundles/cortex-crds --generate-name
```
+Let's also install the hypervisor crd to all three cluster which is needed as an external dependency for this example:
+```bash
+curl -L https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/kvm.cloud.sap_hypervisors.yaml > /tmp/hypervisor-crd.yaml
+kubectl --context kind-cortex-home apply -f /tmp/hypervisor-crd.yaml
+kubectl --context kind-cortex-remote-az-a apply -f /tmp/hypervisor-crd.yaml
+kubectl --context kind-cortex-remote-az-b apply -f /tmp/hypervisor-crd.yaml
+```
+
Also, we need to extract the root CA certificate used by the `cortex-remote-*` clusters' API servers, so that we can configure the cortex pods in the `cortex-home` cluster to trust them.
```bash
@@ -91,22 +102,22 @@ global:
conf:
apiservers:
remotes:
- - host: https://host.docker.internal:8443
+ - host: https://host.docker.internal:8444
gvks:
- kvm.cloud.sap/v1/Hypervisor
- kvm.cloud.sap/v1/HypervisorList
labels:
az: cortex-remote-az-a
caCert: |
- $(cat /tmp/root-ca-home.pem | sed 's/^/ /')
- - host: https://host.docker.internal:8444
+$(cat /tmp/root-ca-home.pem | sed 's/^/ /')
+ - host: https://host.docker.internal:8445
gvks:
- kvm.cloud.sap/v1/Hypervisor
- kvm.cloud.sap/v1/HypervisorList
labels:
az: cortex-remote-az-b
caCert: |
- $(cat /tmp/root-ca-remote-az-b.pem | sed 's/^/ /')
+$(cat /tmp/root-ca-remote-az-b.pem | sed 's/^/ /')
EOF
```
diff --git a/docs/guides/multicluster/run.sh b/docs/guides/multicluster/run.sh
new file mode 100755
index 000000000..6403fdf29
--- /dev/null
+++ b/docs/guides/multicluster/run.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+set -e
+
+echo "Creating home cluster"
+kind create cluster --config docs/guides/multicluster/cortex-home.yaml
+
+echo "Applying cluster role binding for oidc endpoint access"
+kubectl --context kind-cortex-home apply -f docs/guides/multicluster/cortex-home-crb.yaml
+
+echo "Installing hypervisor crd in home cluster"
+
+echo "Storing home cluster cert under /tmp/root-ca-home.pem"
+kubectl --context kind-cortex-home --namespace kube-system \
+ get configmap extension-apiserver-authentication \
+ -o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-home.pem
+
+echo "Creating az-a and az-b clusters"
+kind create cluster --config docs/guides/multicluster/cortex-remote-az-a.yaml
+kind create cluster --config docs/guides/multicluster/cortex-remote-az-b.yaml
+
+echo "Granting cortex-home sa tokens access to az-a and az-b clusters"
+kubectl --context kind-cortex-remote-az-a apply -f docs/guides/multicluster/cortex-remote-crb.yaml
+kubectl --context kind-cortex-remote-az-b apply -f docs/guides/multicluster/cortex-remote-crb.yaml
+
+echo "Installing cortex crds in az-a and az-b clusters"
+kubectl config use-context kind-cortex-remote-az-a
+helm install helm/bundles/cortex-crds --generate-name
+kubectl config use-context kind-cortex-remote-az-b
+helm install helm/bundles/cortex-crds --generate-name
+
+echo "Installing hypervisor crd as external dependency to all three clusters"
+curl -L https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/kvm.cloud.sap_hypervisors.yaml > /tmp/hypervisor-crd.yaml
+kubectl --context kind-cortex-home apply -f /tmp/hypervisor-crd.yaml
+kubectl --context kind-cortex-remote-az-a apply -f /tmp/hypervisor-crd.yaml
+kubectl --context kind-cortex-remote-az-b apply -f /tmp/hypervisor-crd.yaml
+
+echo "Storing az-a and az-b cluster certs under /tmp/root-ca-az-a.pem and /tmp/root-ca-az-b.pem"
+kubectl --context kind-cortex-remote-az-a --namespace kube-system \
+ get configmap extension-apiserver-authentication \
+ -o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-remote-az-a.pem
+kubectl --context kind-cortex-remote-az-b --namespace kube-system \
+ get configmap extension-apiserver-authentication \
+ -o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-remote-az-b.pem
+
+echo "Setting up tilt overrides for cortex values"
+export TILT_OVERRIDES_PATH=/tmp/cortex-values.yaml
+tee $TILT_OVERRIDES_PATH <
Date: Tue, 17 Mar 2026 14:44:48 +0100
Subject: [PATCH 16/24] PR feedback
---
docs/guides/multicluster/readme.md | 2 +-
docs/guides/multicluster/run.sh | 6 ++----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/docs/guides/multicluster/readme.md b/docs/guides/multicluster/readme.md
index 3050d6c51..cd75cf0aa 100644
--- a/docs/guides/multicluster/readme.md
+++ b/docs/guides/multicluster/readme.md
@@ -109,7 +109,7 @@ global:
labels:
az: cortex-remote-az-a
caCert: |
-$(cat /tmp/root-ca-home.pem | sed 's/^/ /')
+$(cat /tmp/root-ca-remote-az-a.pem | sed 's/^/ /')
- host: https://host.docker.internal:8445
gvks:
- kvm.cloud.sap/v1/Hypervisor
diff --git a/docs/guides/multicluster/run.sh b/docs/guides/multicluster/run.sh
index 6403fdf29..063de8ab6 100755
--- a/docs/guides/multicluster/run.sh
+++ b/docs/guides/multicluster/run.sh
@@ -8,8 +8,6 @@ kind create cluster --config docs/guides/multicluster/cortex-home.yaml
echo "Applying cluster role binding for oidc endpoint access"
kubectl --context kind-cortex-home apply -f docs/guides/multicluster/cortex-home-crb.yaml
-echo "Installing hypervisor crd in home cluster"
-
echo "Storing home cluster cert under /tmp/root-ca-home.pem"
kubectl --context kind-cortex-home --namespace kube-system \
get configmap extension-apiserver-authentication \
@@ -35,7 +33,7 @@ kubectl --context kind-cortex-home apply -f /tmp/hypervisor-crd.yaml
kubectl --context kind-cortex-remote-az-a apply -f /tmp/hypervisor-crd.yaml
kubectl --context kind-cortex-remote-az-b apply -f /tmp/hypervisor-crd.yaml
-echo "Storing az-a and az-b cluster certs under /tmp/root-ca-az-a.pem and /tmp/root-ca-az-b.pem"
+echo "Storing az-a and az-b cluster certs under /tmp/root-ca-remote-az-a.pem and /tmp/root-ca-remote-az-b.pem"
kubectl --context kind-cortex-remote-az-a --namespace kube-system \
get configmap extension-apiserver-authentication \
-o jsonpath="{.data['client-ca-file']}" > /tmp/root-ca-remote-az-a.pem
@@ -57,7 +55,7 @@ global:
labels:
az: cortex-remote-az-a
caCert: |
-$(cat /tmp/root-ca-home.pem | sed 's/^/ /')
+$(cat /tmp/root-ca-remote-az-a.pem | sed 's/^/ /')
- host: https://host.docker.internal:8445
gvks:
- kvm.cloud.sap/v1/Hypervisor
From 0691d4ea97a6ca710d2aa488b58d3342d3baec1a Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Tue, 17 Mar 2026 14:58:08 +0100
Subject: [PATCH 17/24] Add outcome
---
docs/guides/multicluster/readme.md | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/docs/guides/multicluster/readme.md b/docs/guides/multicluster/readme.md
index cd75cf0aa..f65d19486 100644
--- a/docs/guides/multicluster/readme.md
+++ b/docs/guides/multicluster/readme.md
@@ -136,8 +136,24 @@ export ACTIVE_DEPLOYMENTS="nova" && tilt up
## Outcome
-With Cortex running in the `cortex-home` cluster and configured to use the `cortex-remote-*` clusters for hypervisors, we can verify that everything is working as expected with the following command:
+With Cortex running in the `cortex-home` cluster and configured to use the `cortex-remote-*` clusters for hypervisors, you can see it's processing your resources in multiple remotes:
-```bash
-TODO
+```
+2026-03-17T13:55:48Z INFO adding remote cluster for resource {"gvk": "kvm.cloud.sap/v1, Kind=Hypervisor", "host": "https://host.docker.internal:8444", "labels": {"az":"cortex-remote-az-a"}}
+2026-03-17T13:55:48Z INFO adding remote cluster for resource {"gvk": "kvm.cloud.sap/v1, Kind=HypervisorList", "host": "https://host.docker.internal:8444", "labels": {"az":"cortex-remote-az-a"}}
+2026-03-17T13:55:48Z INFO adding remote cluster for resource {"gvk": "kvm.cloud.sap/v1, Kind=Hypervisor", "host": "https://host.docker.internal:8445", "labels": {"az":"cortex-remote-az-b"}}
+2026-03-17T13:55:48Z INFO adding remote cluster for resource {"gvk": "kvm.cloud.sap/v1, Kind=HypervisorList", "host": "https://host.docker.internal:8445", "labels": {"az":"cortex-remote-az-b"}}
+...
+2026-03-17T13:56:06Z INFO Reconciling resource {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-1-az-b", "reconcileID": "283342c3-5efe-4afc-a906-67d4c17dcba9"}
+2026-03-17T13:56:06Z INFO Desired overcommit ratios based on traits {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-1-az-b", "reconcileID": "283342c3-5efe-4afc-a906-67d4c17dcba9", "desiredOvercommit": {}}
+2026-03-17T13:56:06Z INFO Overcommit ratios are up to date, no update needed {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-1-az-b", "reconcileID": "283342c3-5efe-4afc-a906-67d4c17dcba9"}
+2026-03-17T13:56:06Z INFO Reconciling resource {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-2-az-b", "reconcileID": "87d2937c-e0b0-415d-8fbf-c31d60a39370"}
+2026-03-17T13:56:06Z INFO Desired overcommit ratios based on traits {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-2-az-b", "reconcileID": "87d2937c-e0b0-415d-8fbf-c31d60a39370", "desiredOvercommit": {}}
+2026-03-17T13:56:06Z INFO Overcommit ratios are up to date, no update needed {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-2-az-b", "reconcileID": "87d2937c-e0b0-415d-8fbf-c31d60a39370"}
+2026-03-17T13:56:06Z INFO Reconciling resource {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-1-az-a", "reconcileID": "d44c7d72-d994-46f3-8f32-04b0aa550ac3"}
+2026-03-17T13:56:06Z INFO Desired overcommit ratios based on traits {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-1-az-a", "reconcileID": "d44c7d72-d994-46f3-8f32-04b0aa550ac3", "desiredOvercommit": {}}
+2026-03-17T13:56:06Z INFO Overcommit ratios are up to date, no update needed {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-1-az-a", "reconcileID": "d44c7d72-d994-46f3-8f32-04b0aa550ac3"}
+2026-03-17T13:56:06Z INFO Reconciling resource {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-2-az-a", "reconcileID": "10cc780b-a44c-4a59-8cc5-518a88146088"}
+2026-03-17T13:56:06Z INFO Desired overcommit ratios based on traits {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-2-az-a", "reconcileID": "10cc780b-a44c-4a59-8cc5-518a88146088", "desiredOvercommit": {}}
+2026-03-17T13:56:06Z INFO Overcommit ratios are up to date, no update needed {"controller": "hypervisor-overcommit-controller", "namespace": "", "name": "hypervisor-2-az-a", "reconcileID": "10cc780b-a44c-4a59-8cc5-518a88146088"}
```
From 6dda0604b148182266accb5e13071dc4a599ea16 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Tue, 17 Mar 2026 16:20:39 +0100
Subject: [PATCH 18/24] Refactor clusterForWrite logic and enhance tests for
router matching
---
pkg/multicluster/client.go | 37 ++++++++---------
pkg/multicluster/client_test.go | 74 ++++++++++++++++++++++++++++++++-
2 files changed, 91 insertions(+), 20 deletions(-)
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 228bd5641..59c1e790c 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -208,29 +208,28 @@ func (c *Client) ClustersForGVK(gvk schema.GroupVersionKind) ([]cluster.Cluster,
func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.Cluster, error) {
c.remoteClustersMu.RLock()
defer c.remoteClustersMu.RUnlock()
+
remotes := c.remoteClusters[gvk]
- if len(remotes) == 0 {
- if c.homeGVKs[gvk] {
- return c.HomeCluster, nil
+
+ if len(remotes) > 0 {
+ router, ok := c.ResourceRouters[gvk]
+ if !ok {
+ return nil, fmt.Errorf("no ResourceRouter configured for GVK %s with %d remote clusters", gvk, len(remotes))
}
- return nil, fmt.Errorf("GVK %s is not configured in home or any remote cluster", gvk)
- }
- router, ok := c.ResourceRouters[gvk]
- if !ok {
- // If there are more than one remote cluster and no router, we don't know which one to write to.
- if len(remotes) == 1 {
- return remotes[0].cluster, nil
+ for _, r := range remotes {
+ match, err := router.Match(obj, r.labels)
+ if err != nil {
+ return nil, fmt.Errorf("ResourceRouter match error for GVK %s: %w", gvk, err)
+ }
+ if match {
+ return r.cluster, nil
+ }
}
- return nil, fmt.Errorf("no ResourceRouter configured for GVK %s with %d clusters", gvk, len(remotes))
}
- for _, r := range remotes {
- match, err := router.Match(obj, r.labels)
- if err != nil {
- return nil, fmt.Errorf("ResourceRouter match error for GVK %s: %w", gvk, err)
- }
- if match {
- return r.cluster, nil
- }
+
+ // If we couldn't find a matching remote cluster (not configured or not found) but the GVK is configured for home, return the home cluster.
+ if c.homeGVKs[gvk] {
+ return c.HomeCluster, nil
}
return nil, fmt.Errorf("no cluster matched for GVK %s", gvk)
}
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index 7e29265dc..6e1cca0fd 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -115,6 +115,13 @@ func newTestScheme(t *testing.T) *runtime.Scheme {
// testRouter is a simple ResourceRouter for testing.
type testRouter struct{}
+// alwaysMatchRouter matches any object to any cluster.
+type alwaysMatchRouter struct{}
+
+func (r alwaysMatchRouter) Match(any, map[string]string) (bool, error) {
+ return true, nil
+}
+
func (r testRouter) Match(obj any, labels map[string]string) (bool, error) {
cm, ok := obj.(*corev1.ConfigMap)
if !ok {
@@ -314,12 +321,17 @@ func TestClient_clusterForWrite_SingleRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
},
}
- cl, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
+ cl, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"az": "az-1"}},
+ })
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -328,6 +340,48 @@ func TestClient_clusterForWrite_SingleRemoteCluster(t *testing.T) {
}
}
+func TestClient_clusterForWrite_NoRouterSingleRemote(t *testing.T) {
+ scheme := newTestScheme(t)
+ c := &Client{
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: newFakeCluster(scheme)}},
+ },
+ }
+
+ _, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{})
+ if err == nil {
+ t.Error("expected error when no router configured for remote cluster")
+ }
+}
+
+func TestClient_clusterForWrite_NoMatchFallsBackToHome(t *testing.T) {
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme)
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: testRouter{},
+ },
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: newFakeCluster(scheme), labels: map[string]string{"az": "az-1"}}},
+ },
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
+ }
+
+ cl, err := c.clusterForWrite(configMapGVK, &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"az": "az-99"}},
+ })
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if cl != homeCluster {
+ t.Error("expected home cluster as fallback when no remote matches")
+ }
+}
+
func TestClient_clusterForWrite_RouterMatches(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
@@ -703,6 +757,9 @@ func TestClient_Create_SingleRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: alwaysMatchRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote}},
},
@@ -811,6 +868,9 @@ func TestClient_Delete_SingleRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: alwaysMatchRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote}},
},
@@ -841,6 +901,9 @@ func TestClient_Update_SingleRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: alwaysMatchRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote}},
},
@@ -881,6 +944,9 @@ func TestClient_Patch_SingleRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ configMapGVK: alwaysMatchRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
configMapGVK: {{cluster: remote}},
},
@@ -1047,6 +1113,9 @@ func TestStatusClient_RoutesToRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ podGVK: alwaysMatchRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
podGVK: {{cluster: remote}},
},
@@ -1102,6 +1171,9 @@ func TestSubResourceClient_RoutesToRemoteCluster(t *testing.T) {
c := &Client{
HomeCluster: homeCluster,
HomeScheme: scheme,
+ ResourceRouters: map[schema.GroupVersionKind]ResourceRouter{
+ podGVK: alwaysMatchRouter{},
+ },
remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
podGVK: {{cluster: remote}},
},
From 8450a16c8a54998164c8bb7ddcb5d38a251e3ec4 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Wed, 18 Mar 2026 09:37:04 +0100
Subject: [PATCH 19/24] Fix error message casing in clusterForWrite function
---
pkg/multicluster/client.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 59c1e790c..4052b53a3 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -219,7 +219,7 @@ func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.
for _, r := range remotes {
match, err := router.Match(obj, r.labels)
if err != nil {
- return nil, fmt.Errorf("ResourceRouter match error for GVK %s: %w", gvk, err)
+ return nil, fmt.Errorf("resource router match error for GVK %s: %w", gvk, err)
}
if match {
return r.cluster, nil
From 34fe33d3bbe654761d4bba9e04d60ec5db7353ed Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Wed, 18 Mar 2026 09:52:16 +0100
Subject: [PATCH 20/24] Enhance error handling for duplicate resources in
multi-cluster Get and List methods
---
pkg/multicluster/client.go | 76 ++++++++++++++++---
pkg/multicluster/client_test.go | 128 +++++++++++++++++++++++++++++++-
2 files changed, 192 insertions(+), 12 deletions(-)
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 4052b53a3..58eaec5f9 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -234,7 +234,8 @@ func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.
return nil, fmt.Errorf("no cluster matched for GVK %s", gvk)
}
-// Get iterates over all clusters with the GVK and returns the first result found.
+// Get iterates over all clusters with the GVK and returns the result.
+// Returns an error if the resource is found in multiple clusters (duplicate).
// If no cluster has the resource, the last NotFound error is returned.
func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
gvk, err := c.GVKFromHomeScheme(obj)
@@ -245,21 +246,38 @@ func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Objec
if err != nil {
return err
}
- var lastErr error
+ found := false
for _, cl := range clusters {
+ // If we already found the resource in a previous cluster, we want to check if it also exists in this cluster to detect duplicates.
+ if found {
+ candidate := obj.DeepCopyObject().(client.Object)
+ err := cl.GetClient().Get(ctx, key, candidate, opts...)
+ if err == nil {
+ return fmt.Errorf("duplicate resource found: %s %s/%s exists in multiple clusters", gvk, key.Namespace, key.Name)
+ }
+ if !apierrors.IsNotFound(err) {
+ return err
+ }
+ continue
+ }
+
err := cl.GetClient().Get(ctx, key, obj, opts...)
if err == nil {
- return nil
+ found = true
+ continue
}
if !apierrors.IsNotFound(err) {
return err
}
- lastErr = err
}
- return lastErr
+ if !found {
+ return apierrors.NewNotFound(schema.GroupResource{Group: gvk.Group, Resource: gvk.Kind}, key.Name)
+ }
+ return nil
}
// List iterates over all clusters with the GVK and returns a combined list.
+// Returns an error if any resources share the same namespace/name across clusters.
func (c *Client) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
gvk, err := c.GVKFromHomeScheme(list)
if err != nil {
@@ -282,6 +300,26 @@ func (c *Client) List(ctx context.Context, list client.ObjectList, opts ...clien
}
allItems = append(allItems, items...)
}
+
+ // Check for duplicate namespace/name pairs across clusters.
+ seen := make(map[string]bool, len(allItems))
+ var duplicates []string
+ for _, item := range allItems {
+ accessor, err := meta.Accessor(item)
+ if err != nil {
+ return fmt.Errorf("failed to access object metadata: %w", err)
+ }
+ key := accessor.GetNamespace() + "/" + accessor.GetName()
+ if _, exists := seen[key]; exists {
+ duplicates = append(duplicates, key)
+ continue
+ }
+ seen[key] = true
+ }
+ if len(duplicates) > 0 {
+ return fmt.Errorf("duplicate resources found in multiple clusters for %s: %v", gvk, duplicates)
+ }
+
return meta.SetList(list, allItems)
}
@@ -452,7 +490,8 @@ type subResourceClient struct {
subResource string
}
-// Get iterates over all clusters with the GVK and returns the first result found.
+// Get iterates over all clusters with the GVK and returns the result.
+// Returns an error if the resource is found in multiple clusters (duplicate).
func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Object, opts ...client.SubResourceGetOption) error {
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
@@ -462,18 +501,35 @@ func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Obj
if err != nil {
return err
}
- var lastErr error
+
+ found := false
for _, cl := range clusters {
+ if found {
+ candidateObj := obj.DeepCopyObject().(client.Object)
+ candidateSub := subResource.DeepCopyObject().(client.Object)
+ err := cl.GetClient().SubResource(c.subResource).Get(ctx, candidateObj, candidateSub, opts...)
+ if err == nil {
+ return fmt.Errorf("duplicate sub-resource found: %s %s/%s exists in multiple clusters", gvk, obj.GetNamespace(), obj.GetName())
+ }
+ if !apierrors.IsNotFound(err) {
+ return err
+ }
+ continue
+ }
+
err := cl.GetClient().SubResource(c.subResource).Get(ctx, obj, subResource, opts...)
if err == nil {
- return nil
+ found = true
+ continue
}
if !apierrors.IsNotFound(err) {
return err
}
- lastErr = err
}
- return lastErr
+ if !found {
+ return apierrors.NewNotFound(schema.GroupResource{Group: gvk.Group, Resource: gvk.Kind}, obj.GetName())
+ }
+ return nil
}
// Create routes the subresource create to the matching cluster using the ResourceRouter.
diff --git a/pkg/multicluster/client_test.go b/pkg/multicluster/client_test.go
index 6e1cca0fd..64b0ae94c 100644
--- a/pkg/multicluster/client_test.go
+++ b/pkg/multicluster/client_test.go
@@ -5,6 +5,7 @@ package multicluster
import (
"context"
+ "strings"
"sync"
"testing"
@@ -545,8 +546,8 @@ func TestClient_Get_SingleRemoteCluster(t *testing.T) {
}
}
-func TestClient_Get_MultiCluster_FirstFound(t *testing.T) {
- // Iterate all remote clusters and return the first found object. In this test, only remote2 has the object, so it should be returned.
+func TestClient_Get_MultiCluster_SingleResult(t *testing.T) {
+ // Iterate all remote clusters and return the object. In this test, only remote2 has the object, so it should be returned.
scheme := newTestScheme(t)
existingCM := &corev1.ConfigMap{
@@ -643,6 +644,66 @@ func TestClient_Get_HomeGVKCluster(t *testing.T) {
}
}
+func TestClient_Get_MultiCluster_DuplicateError(t *testing.T) {
+ // If the same resource exists in multiple remote clusters, Get should return a duplicate error.
+ scheme := newTestScheme(t)
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "shared-cm", Namespace: "default"},
+ Data: map[string]string{"key": "value"},
+ }
+ remote1 := newFakeCluster(scheme, cm.DeepCopy())
+ remote2 := newFakeCluster(scheme, cm.DeepCopy())
+
+ c := &Client{
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
+ }
+
+ result := &corev1.ConfigMap{}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "shared-cm", Namespace: "default"}, result)
+ if err == nil {
+ t.Fatal("expected duplicate error, got nil")
+ }
+ if !strings.Contains(err.Error(), "duplicate") {
+ t.Errorf("expected duplicate error, got: %v", err)
+ }
+}
+
+func TestClient_Get_HomeAndRemote_DuplicateError(t *testing.T) {
+ // If the same resource exists in both home and a remote cluster, Get should return a duplicate error.
+ scheme := newTestScheme(t)
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "shared-cm", Namespace: "default"},
+ Data: map[string]string{"key": "value"},
+ }
+ homeCluster := newFakeCluster(scheme, cm.DeepCopy())
+ remote := newFakeCluster(scheme, cm.DeepCopy())
+
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
+ },
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapGVK: true},
+ }
+
+ result := &corev1.ConfigMap{}
+ err := c.Get(context.Background(), client.ObjectKey{Name: "shared-cm", Namespace: "default"}, result)
+ if err == nil {
+ t.Fatal("expected duplicate error, got nil")
+ }
+ if !strings.Contains(err.Error(), "duplicate") {
+ t.Errorf("expected duplicate error, got: %v", err)
+ }
+}
+
func TestClient_List_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
cm1 := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm1", Namespace: "default"}}
@@ -749,6 +810,69 @@ func TestClient_List_HomeClusterOnly(t *testing.T) {
}
}
+func TestClient_List_MultipleClusters_DuplicateError(t *testing.T) {
+ // If the same namespace/name exists in multiple remote clusters, List should return a duplicate error.
+ scheme := newTestScheme(t)
+ remote1 := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "shared-cm", Namespace: "default"}},
+ )
+ remote2 := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "shared-cm", Namespace: "default"}},
+ )
+
+ c := &Client{
+ HomeCluster: newFakeCluster(scheme),
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapListGVK: {
+ {cluster: remote1, labels: map[string]string{"az": "az-1"}},
+ {cluster: remote2, labels: map[string]string{"az": "az-2"}},
+ },
+ },
+ }
+
+ cmList := &corev1.ConfigMapList{}
+ err := c.List(context.Background(), cmList, client.InNamespace("default"))
+ if err == nil {
+ t.Fatal("expected duplicate error, got nil")
+ }
+ if !strings.Contains(err.Error(), "duplicate") {
+ t.Errorf("expected duplicate error, got: %v", err)
+ }
+ if !strings.Contains(err.Error(), "default/shared-cm") {
+ t.Errorf("expected error to contain duplicated resource name, got: %v", err)
+ }
+}
+
+func TestClient_List_HomeAndRemote_DuplicateError(t *testing.T) {
+ // If the same namespace/name exists in both home and remote, List should return a duplicate error.
+ scheme := newTestScheme(t)
+ homeCluster := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "shared-cm", Namespace: "default"}},
+ )
+ remote := newFakeCluster(scheme,
+ &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "shared-cm", Namespace: "default"}},
+ )
+
+ c := &Client{
+ HomeCluster: homeCluster,
+ HomeScheme: scheme,
+ remoteClusters: map[schema.GroupVersionKind][]remoteCluster{
+ configMapListGVK: {{cluster: remote, labels: map[string]string{"az": "az-1"}}},
+ },
+ homeGVKs: map[schema.GroupVersionKind]bool{configMapListGVK: true},
+ }
+
+ cmList := &corev1.ConfigMapList{}
+ err := c.List(context.Background(), cmList, client.InNamespace("default"))
+ if err == nil {
+ t.Fatal("expected duplicate error, got nil")
+ }
+ if !strings.Contains(err.Error(), "duplicate") {
+ t.Errorf("expected duplicate error, got: %v", err)
+ }
+}
+
func TestClient_Create_SingleRemoteCluster(t *testing.T) {
scheme := newTestScheme(t)
homeCluster := newFakeCluster(scheme)
From f93f8cb5d0a0eaf88141b5a827835205f82929de Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Wed, 18 Mar 2026 10:34:36 +0100
Subject: [PATCH 21/24] Implement soft fail if cluster is not available
---
pkg/multicluster/client.go | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index 58eaec5f9..a875edc23 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -238,6 +238,7 @@ func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.
// Returns an error if the resource is found in multiple clusters (duplicate).
// If no cluster has the resource, the last NotFound error is returned.
func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
+ log := ctrl.LoggerFrom(ctx)
gvk, err := c.GVKFromHomeScheme(obj)
if err != nil {
return err
@@ -256,7 +257,7 @@ func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Objec
return fmt.Errorf("duplicate resource found: %s %s/%s exists in multiple clusters", gvk, key.Namespace, key.Name)
}
if !apierrors.IsNotFound(err) {
- return err
+ log.Error(err, "error checking for duplicate resource in cluster", "gvk", gvk, "namespace", key.Namespace, "name", key.Name)
}
continue
}
@@ -267,7 +268,7 @@ func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Objec
continue
}
if !apierrors.IsNotFound(err) {
- return err
+ log.Error(err, "error getting resource from cluster", "gvk", gvk, "namespace", key.Namespace, "name", key.Name)
}
}
if !found {
@@ -493,6 +494,8 @@ type subResourceClient struct {
// Get iterates over all clusters with the GVK and returns the result.
// Returns an error if the resource is found in multiple clusters (duplicate).
func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Object, opts ...client.SubResourceGetOption) error {
+ log := ctrl.LoggerFrom(ctx)
+
gvk, err := c.multiclusterClient.GVKFromHomeScheme(obj)
if err != nil {
return err
@@ -512,7 +515,7 @@ func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Obj
return fmt.Errorf("duplicate sub-resource found: %s %s/%s exists in multiple clusters", gvk, obj.GetNamespace(), obj.GetName())
}
if !apierrors.IsNotFound(err) {
- return err
+ log.Error(err, "error checking for duplicate sub-resource in cluster", "gvk", gvk, "namespace", obj.GetNamespace(), "name", obj.GetName(), "subresource", c.subResource)
}
continue
}
@@ -523,7 +526,7 @@ func (c *subResourceClient) Get(ctx context.Context, obj, subResource client.Obj
continue
}
if !apierrors.IsNotFound(err) {
- return err
+ log.Error(err, "error getting sub-resource from cluster", "gvk", gvk, "namespace", obj.GetNamespace(), "name", obj.GetName(), "subresource", c.subResource)
}
}
if !found {
From b46676a54abcee12fbe80ae19f81f5e041f94a1d Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Wed, 18 Mar 2026 10:41:22 +0100
Subject: [PATCH 22/24] Enhance Get and List methods to log non-NotFound errors
and prevent single cluster failures from blocking reads
---
pkg/multicluster/client.go | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/pkg/multicluster/client.go b/pkg/multicluster/client.go
index a875edc23..bed938286 100644
--- a/pkg/multicluster/client.go
+++ b/pkg/multicluster/client.go
@@ -236,7 +236,9 @@ func (c *Client) clusterForWrite(gvk schema.GroupVersionKind, obj any) (cluster.
// Get iterates over all clusters with the GVK and returns the result.
// Returns an error if the resource is found in multiple clusters (duplicate).
-// If no cluster has the resource, the last NotFound error is returned.
+// If no cluster has the resource, a NotFound error is returned.
+// Non-NotFound errors from individual clusters are logged and silently skipped
+// so that a single unavailable cluster does not block the entire read path.
func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
log := ctrl.LoggerFrom(ctx)
gvk, err := c.GVKFromHomeScheme(obj)
@@ -279,7 +281,10 @@ func (c *Client) Get(ctx context.Context, key client.ObjectKey, obj client.Objec
// List iterates over all clusters with the GVK and returns a combined list.
// Returns an error if any resources share the same namespace/name across clusters.
+// Errors from individual clusters are logged and silently skipped so that a
+// single unavailable cluster does not block the entire read path.
func (c *Client) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
+ log := ctrl.LoggerFrom(ctx)
gvk, err := c.GVKFromHomeScheme(list)
if err != nil {
return err
@@ -293,7 +298,8 @@ func (c *Client) List(ctx context.Context, list client.ObjectList, opts ...clien
for _, cl := range clusters {
listCopy := list.DeepCopyObject().(client.ObjectList)
if err := cl.GetClient().List(ctx, listCopy, opts...); err != nil {
- return err
+ log.Error(err, "error listing resources from cluster", "gvk", gvk)
+ continue
}
items, err := meta.ExtractList(listCopy)
if err != nil {
From fa723fe5a04b52bc25e03add3c5e7dc9b53d9e9e Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Wed, 18 Mar 2026 11:30:39 +0100
Subject: [PATCH 23/24] Use corev1.LabelTopologyZone
---
pkg/multicluster/routers.go | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/pkg/multicluster/routers.go b/pkg/multicluster/routers.go
index 22b7d1a6e..1e0496791 100644
--- a/pkg/multicluster/routers.go
+++ b/pkg/multicluster/routers.go
@@ -7,6 +7,7 @@ import (
"errors"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+ corev1 "k8s.io/api/core/v1"
)
// ResourceRouter determines which remote cluster a resource should be written to
@@ -32,7 +33,7 @@ func (h HypervisorResourceRouter) Match(obj any, labels map[string]string) (bool
if !ok {
return false, errors.New("cluster does not have availability zone label")
}
- hvAZ, ok := hv.Labels["topology.kubernetes.io/zone"]
+ hvAZ, ok := hv.Labels[corev1.LabelTopologyZone]
if !ok {
return false, errors.New("hypervisor does not have availability zone label")
}
From 0c24a21fd3db36bccb2aed5a055b55f30b2aaf57 Mon Sep 17 00:00:00 2001
From: Markus Wieland
Date: Thu, 19 Mar 2026 09:24:49 +0100
Subject: [PATCH 24/24] Improve formatting and readability in SetupWithManager
function
---
.../reservations/controller/controller.go | 32 +++++++++----------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/internal/scheduling/reservations/controller/controller.go b/internal/scheduling/reservations/controller/controller.go
index af027acec..20580bdff 100644
--- a/internal/scheduling/reservations/controller/controller.go
+++ b/internal/scheduling/reservations/controller/controller.go
@@ -517,20 +517,20 @@ func (r *ReservationReconciler) SetupWithManager(mgr ctrl.Manager, mcl *multiclu
})); err != nil {
return err
}
- bldr := multicluster.BuildController(mcl, mgr)
- // Watch reservation changes across all clusters.
- bldr, err := bldr.WatchesMulticluster(
- &v1alpha1.Reservation{},
- &handler.EnqueueRequestForObject{},
- notFailoverReservationPredicate,
- )
- if err != nil {
- return err
- }
- return bldr.Named("reservation").
- WithOptions(controller.Options{
- // We want to process reservations one at a time to avoid overbooking.
- MaxConcurrentReconciles: 1,
- }).
- Complete(r)
+ bldr := multicluster.BuildController(mcl, mgr)
+ // Watch reservation changes across all clusters.
+ bldr, err := bldr.WatchesMulticluster(
+ &v1alpha1.Reservation{},
+ &handler.EnqueueRequestForObject{},
+ notFailoverReservationPredicate,
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("reservation").
+ WithOptions(controller.Options{
+ // We want to process reservations one at a time to avoid overbooking.
+ MaxConcurrentReconciles: 1,
+ }).
+ Complete(r)
}