From 33bad70da2f64a2c86f8b14e87dc58cc4094320a Mon Sep 17 00:00:00 2001 From: Cameron Meissner Date: Mon, 11 May 2026 15:56:46 -0700 Subject: [PATCH 1/5] fix(client): add explicit grpc.ConnectParams to avoid conflicts retry/backoff strategies --- client/internal/bootstrap/grpc.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/client/internal/bootstrap/grpc.go b/client/internal/bootstrap/grpc.go index b3e65bc..87139a6 100644 --- a/client/internal/bootstrap/grpc.go +++ b/client/internal/bootstrap/grpc.go @@ -19,6 +19,7 @@ import ( "go.uber.org/zap" "golang.org/x/oauth2" "google.golang.org/grpc" + "google.golang.org/grpc/backoff" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/oauth" @@ -55,6 +56,15 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl AccessToken: token, }), }), + grpc.WithConnectParams(grpc.ConnectParams{ + Backoff: backoff.Config{ + BaseDelay: 1 * time.Second, + Multiplier: 1.6, + Jitter: 0.2, + MaxDelay: 5 * time.Second, + }, + MinConnectTimeout: 10 * time.Second, + }), grpc.WithUnaryInterceptor(retry.UnaryClientInterceptor( retry.WithOnRetryCallback(getGRPCOnRetryCallbackFunc()), retry.WithBackoff(retry.BackoffLinearWithJitter(2*time.Second, 0.25)), From 71bef3d2dc6bc0b8fa0b5a55f1283a371bcf9be1 Mon Sep 17 00:00:00 2001 From: Cameron Meissner Date: Mon, 11 May 2026 16:16:55 -0700 Subject: [PATCH 2/5] chore: cleanup --- client/internal/bootstrap/grpc.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/client/internal/bootstrap/grpc.go b/client/internal/bootstrap/grpc.go index 87139a6..9d1c25e 100644 --- a/client/internal/bootstrap/grpc.go +++ b/client/internal/bootstrap/grpc.go @@ -47,6 +47,14 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl return nil, nil, fmt.Errorf("failed to get TLS config: %w", err) } + // override max delay to 3s (default is 120s) - this ensures the gRPC subchannel + // re-attempts a real TCP+TLS connection at least every 3s, which aligns with + // the ~2s RPC-level retry cadence. Without this cap, the subchannel exponential + // backoff grows to 120s, causing the retry interceptor to receive cached errors + // from the last real attempt rather than triggering new connection attempts. + grpcConnectionBackoffConfig := backoff.DefaultConfig + grpcConnectionBackoffConfig.MaxDelay = 3 * time.Second + conn, err := grpc.NewClient( fmt.Sprintf("%s:443", cfg.APIServerFQDN), grpc.WithUserAgent(internalhttp.GetUserAgent()), @@ -56,15 +64,12 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl AccessToken: token, }), }), + // transport/connection-level retry config grpc.WithConnectParams(grpc.ConnectParams{ - Backoff: backoff.Config{ - BaseDelay: 1 * time.Second, - Multiplier: 1.6, - Jitter: 0.2, - MaxDelay: 5 * time.Second, - }, + Backoff: grpcConnectionBackoffConfig, MinConnectTimeout: 10 * time.Second, }), + // RPC-level retry config grpc.WithUnaryInterceptor(retry.UnaryClientInterceptor( retry.WithOnRetryCallback(getGRPCOnRetryCallbackFunc()), retry.WithBackoff(retry.BackoffLinearWithJitter(2*time.Second, 0.25)), From a31046aa8312c0409b977b9bc2f3276dcc0ab256 Mon Sep 17 00:00:00 2001 From: Cameron Meissner Date: Mon, 11 May 2026 16:23:10 -0700 Subject: [PATCH 3/5] chore: comments --- client/internal/bootstrap/grpc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/internal/bootstrap/grpc.go b/client/internal/bootstrap/grpc.go index 9d1c25e..da491c1 100644 --- a/client/internal/bootstrap/grpc.go +++ b/client/internal/bootstrap/grpc.go @@ -64,7 +64,7 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl AccessToken: token, }), }), - // transport/connection-level retry config + // transport/connection-level config grpc.WithConnectParams(grpc.ConnectParams{ Backoff: grpcConnectionBackoffConfig, MinConnectTimeout: 10 * time.Second, From 595ba4860440e572dd572d2544e30acb603a9d65 Mon Sep 17 00:00:00 2001 From: Cameron Meissner Date: Wed, 13 May 2026 15:12:48 -0700 Subject: [PATCH 4/5] chore: update MinConnectTimeout --- client/internal/bootstrap/grpc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/internal/bootstrap/grpc.go b/client/internal/bootstrap/grpc.go index da491c1..3967c34 100644 --- a/client/internal/bootstrap/grpc.go +++ b/client/internal/bootstrap/grpc.go @@ -67,7 +67,7 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl // transport/connection-level config grpc.WithConnectParams(grpc.ConnectParams{ Backoff: grpcConnectionBackoffConfig, - MinConnectTimeout: 10 * time.Second, + MinConnectTimeout: 7 * time.Second, }), // RPC-level retry config grpc.WithUnaryInterceptor(retry.UnaryClientInterceptor( From 148c2def31643520a339721e0c53f4af8c38b7f8 Mon Sep 17 00:00:00 2001 From: Cameron Meissner Date: Wed, 13 May 2026 15:17:13 -0700 Subject: [PATCH 5/5] chore: changes --- client/internal/bootstrap/grpc.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/client/internal/bootstrap/grpc.go b/client/internal/bootstrap/grpc.go index 3967c34..dc611e5 100644 --- a/client/internal/bootstrap/grpc.go +++ b/client/internal/bootstrap/grpc.go @@ -66,8 +66,11 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl }), // transport/connection-level config grpc.WithConnectParams(grpc.ConnectParams{ - Backoff: grpcConnectionBackoffConfig, - MinConnectTimeout: 7 * time.Second, + Backoff: grpcConnectionBackoffConfig, + // MinConnectTimeout caps the per-attempt connection timeout (default: 20s). + // 5s balances fast retry cycles (~8s/cycle) against headroom for first-connection + // latency through new LB paths — healthy intra-Azure TCP+TLS 1.3 handshakes complete in <1s. + MinConnectTimeout: 5 * time.Second, }), // RPC-level retry config grpc.WithUnaryInterceptor(retry.UnaryClientInterceptor(