From 780c2662fd3d55213f65f60c56d91b3d4fb90905 Mon Sep 17 00:00:00 2001 From: Dan Mullineux Date: Thu, 25 Jun 2026 14:59:42 +0100 Subject: [PATCH 1/2] Retry sidecar API 429s with Retry-After budget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds RetryOn429Budget to httpcl.Config. When set, the client retries HTTP 429 responses honouring the Retry-After header (integer seconds or HTTP date). Retries stop and a RateLimitError is returned when the cumulative elapsed+wait time would exceed the budget, or when a single Retry-After value exceeds it. Non-429 retry behaviour (5xx, network errors) is unchanged — a per-call counter caps those at the original 3-retry limit even though RetryMax is raised to 30 for 429 headroom. Also fixes a resp.Body leak when retryablehttp returns both a response and a CheckRetry error. The CircleCI API client uses a 30s budget, covering all sidecar calls. Co-Authored-By: Claude Sonnet 4.6 --- internal/circleci/client.go | 10 +-- internal/httpcl/client.go | 116 ++++++++++++++++++++++++++++----- internal/httpcl/client_test.go | 108 ++++++++++++++++++++++++++++++ internal/httpcl/error.go | 24 +++++++ 4 files changed, 237 insertions(+), 21 deletions(-) diff --git a/internal/circleci/client.go b/internal/circleci/client.go index c1fac6db..1cbc5490 100644 --- a/internal/circleci/client.go +++ b/internal/circleci/client.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "time" hc "github.com/CircleCI-Public/chunk-cli/internal/httpcl" "github.com/CircleCI-Public/chunk-cli/internal/version" @@ -33,10 +34,11 @@ func NewClient(cfg Config) (*Client, error) { return nil, ErrTokenNotFound } cl := hc.New(hc.Config{ - BaseURL: cfg.BaseURL, - AuthToken: cfg.Token, - AuthHeader: "Circle-Token", - UserAgent: version.UserAgent(), + BaseURL: cfg.BaseURL, + AuthToken: cfg.Token, + AuthHeader: "Circle-Token", + UserAgent: version.UserAgent(), + RetryOn429Budget: 30 * time.Second, }) return &Client{cl: cl}, nil } diff --git a/internal/httpcl/client.go b/internal/httpcl/client.go index ce093062..c889bf52 100644 --- a/internal/httpcl/client.go +++ b/internal/httpcl/client.go @@ -11,11 +11,22 @@ import ( "io" "net/http" "net/url" + "strconv" "time" "github.com/hashicorp/go-retryablehttp" ) +// retryCtxKey is the context key for the per-call retry state. +type retryCtxKey struct{} + +// retryState tracks per-call retry counters stored in the request context. +// Using a pointer allows mutation across CheckRetry invocations for the same call. +type retryState struct { + start time.Time + nonRateLimitAttempts int +} + const jsonContentType = "application/json; charset=utf-8" // Config configures a Client. @@ -34,18 +45,24 @@ type Config struct { // DisableRetries disables automatic retries. By default requests are // retried up to 3 times with exponential backoff. DisableRetries bool + // RetryOn429Budget, when non-zero, enables retrying HTTP 429 responses by + // honouring the Retry-After response header. Retries stop when the + // cumulative wait time would exceed this budget, or when a single + // Retry-After value exceeds it, and a RateLimitError is returned. + RetryOn429Budget time.Duration // Transport overrides the HTTP transport (useful for testing). Transport http.RoundTripper } // Client is a simple HTTP client with JSON defaults and automatic retries. type Client struct { - baseURL string - authToken string - authHeader string - userAgent string - timeout time.Duration - http *retryablehttp.Client + baseURL string + authToken string + authHeader string + userAgent string + timeout time.Duration + retryOn429Budget time.Duration + http *retryablehttp.Client } // New creates a Client from the given config. @@ -64,20 +81,75 @@ func New(cfg Config) *Client { rc.RetryWaitMax = 2 * time.Second rc.Logger = nil // suppress default log output + if cfg.RetryOn429Budget > 0 { + // Raise RetryMax to accommodate 429 retries. The CheckRetry below caps + // non-429 retries at the original limit so 5xx behaviour is unchanged. + rc.RetryMax = 30 + budget := cfg.RetryOn429Budget + origMax := 3 + if cfg.DisableRetries { + origMax = 0 + } + rc.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) { + state, _ := ctx.Value(retryCtxKey{}).(*retryState) + if resp != nil && resp.StatusCode == http.StatusTooManyRequests { + retryAfter := parseRetryAfter(resp) + elapsed := time.Duration(0) + if state != nil { + elapsed = time.Since(state.start) + } + if elapsed+retryAfter > budget { + return false, &RateLimitError{RetryAfter: retryAfter, Budget: budget} + } + return true, nil + } + // Cap non-429 retries at the original limit. + if state != nil { + state.nonRateLimitAttempts++ + if state.nonRateLimitAttempts > origMax { + return false, nil + } + } + return retryablehttp.DefaultRetryPolicy(ctx, resp, err) + } + // DefaultBackoff already honours Retry-After; keep it. + } + if cfg.Transport != nil { rc.HTTPClient.Transport = cfg.Transport } return &Client{ - baseURL: cfg.BaseURL, - authToken: cfg.AuthToken, - authHeader: cfg.AuthHeader, - userAgent: cfg.UserAgent, - timeout: timeout, - http: rc, + baseURL: cfg.BaseURL, + authToken: cfg.AuthToken, + authHeader: cfg.AuthHeader, + userAgent: cfg.UserAgent, + timeout: timeout, + retryOn429Budget: cfg.RetryOn429Budget, + http: rc, } } +// parseRetryAfter parses the Retry-After header as seconds or an HTTP date. +func parseRetryAfter(resp *http.Response) time.Duration { + ra := resp.Header.Get("Retry-After") + if ra == "" { + return 0 + } + if secs, err := strconv.ParseInt(ra, 10, 64); err == nil { + if secs > 0 { + return time.Duration(secs) * time.Second + } + return 0 + } + if t, err := http.ParseTime(ra); err == nil { + if d := time.Until(t); d > 0 { + return d + } + } + return 0 +} + // Call executes the request and returns the HTTP status code. // Non-2xx responses return an *HTTPError. If a decoder is set and the // response is 2xx, the response body is decoded. @@ -99,7 +171,15 @@ func (c *Client) Call(ctx context.Context, r Request) (int, error) { bodyReader = bytes.NewReader(b) } - ctx, cancel := context.WithTimeout(ctx, c.timeout) + if c.retryOn429Budget > 0 { + ctx = context.WithValue(ctx, retryCtxKey{}, &retryState{start: time.Now()}) + } + // When 429 retry is enabled, extend the deadline to accommodate retry waits. + ctxTimeout := c.timeout + if c.retryOn429Budget > 0 { + ctxTimeout = c.retryOn429Budget + c.timeout + } + ctx, cancel := context.WithTimeout(ctx, ctxTimeout) defer cancel() req, err := retryablehttp.NewRequestWithContext(ctx, r.method, u.String(), bodyReader) @@ -131,13 +211,15 @@ func (c *Client) Call(ctx context.Context, r Request) (int, error) { } resp, err := c.http.Do(req) + if resp != nil { + defer func() { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + }() + } if err != nil { return 0, err } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() status := resp.StatusCode diff --git a/internal/httpcl/client_test.go b/internal/httpcl/client_test.go index 3032ab8f..0787eca2 100644 --- a/internal/httpcl/client_test.go +++ b/internal/httpcl/client_test.go @@ -5,8 +5,10 @@ import ( "encoding/json" "net/http" "net/http/httptest" + "strings" "sync/atomic" "testing" + "time" hc "github.com/CircleCI-Public/chunk-cli/internal/httpcl" ) @@ -146,6 +148,112 @@ func TestRouteParams(t *testing.T) { } } +func TestRetryOn429_RetriesWithinBudget(t *testing.T) { + var attempts atomic.Int32 + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := attempts.Add(1) + if n == 1 { + w.Header().Set("Retry-After", "1") + w.WriteHeader(http.StatusTooManyRequests) + return + } + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + c := hc.New(hc.Config{ + BaseURL: srv.URL, + RetryOn429Budget: 10 * time.Second, + }) + + status, err := c.Call(context.Background(), hc.NewRequest("GET", "/")) + if err != nil { + t.Fatalf("expected success after retry, got: %v", err) + } + if status != 200 { + t.Fatalf("expected 200, got %d", status) + } + if n := attempts.Load(); n != 2 { + t.Fatalf("expected 2 attempts, got %d", n) + } +} + +func TestRetryOn429_BailsWhenRetryAfterExceedsBudget(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "60") + w.WriteHeader(http.StatusTooManyRequests) + })) + defer srv.Close() + + c := hc.New(hc.Config{ + BaseURL: srv.URL, + RetryOn429Budget: 30 * time.Second, + }) + + _, err := c.Call(context.Background(), hc.NewRequest("GET", "/")) + if err == nil { + t.Fatal("expected error, got nil") + } + if !hc.IsRateLimitError(err) { + t.Fatalf("expected RateLimitError, got: %v", err) + } +} + +func TestRetryOn429_MessageContainsBackoffHint(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "45") + w.WriteHeader(http.StatusTooManyRequests) + })) + defer srv.Close() + + c := hc.New(hc.Config{ + BaseURL: srv.URL, + RetryOn429Budget: 30 * time.Second, + }) + + _, err := c.Call(context.Background(), hc.NewRequest("GET", "/")) + if err == nil { + t.Fatal("expected error, got nil") + } + msg := err.Error() + if !strings.Contains(msg, "rate limited") { + t.Errorf("error message should mention rate limiting: %q", msg) + } + if !strings.Contains(msg, "try again later") { + t.Errorf("error message should hint to retry later: %q", msg) + } +} + +func TestRetryOn429_DisabledByDefault(t *testing.T) { + var attempts atomic.Int32 + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.Header().Set("Retry-After", "1") + w.WriteHeader(http.StatusTooManyRequests) + })) + defer srv.Close() + + // No RetryOn429Budget — falls through to DefaultRetryPolicy which + // retries 429 up to RetryMax times with normal backoff (not a budget error). + c := hc.New(hc.Config{ + BaseURL: srv.URL, + DisableRetries: true, + }) + + _, err := c.Call(context.Background(), hc.NewRequest("GET", "/")) + if err == nil { + t.Fatal("expected error for 429") + } + if hc.IsRateLimitError(err) { + t.Fatal("expected plain HTTPError (no budget configured), got RateLimitError") + } + if n := attempts.Load(); n != 1 { + t.Fatalf("expected 1 attempt with retries disabled, got %d", n) + } +} + func TestRouteParamsMultiple(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/api/v2/agents/org/org-1/project/proj-2/runs" { diff --git a/internal/httpcl/error.go b/internal/httpcl/error.go index d3592683..b7d03ef2 100644 --- a/internal/httpcl/error.go +++ b/internal/httpcl/error.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "net/http" + "time" ) // HTTPError represents a non-2xx HTTP response. @@ -45,3 +46,26 @@ func HasStatusCode(err error, codes ...int) bool { } return false } + +// RateLimitError is returned when the server sends a 429 with a Retry-After +// value that, combined with elapsed retry time, exceeds the configured budget. +type RateLimitError struct { + RetryAfter time.Duration + Budget time.Duration +} + +func (e *RateLimitError) Error() string { + if e.RetryAfter > 0 { + return fmt.Sprintf( + "rate limited: server requests %s back-off, retry budget of %s exhausted — try again later", + e.RetryAfter.Round(time.Second), e.Budget.Round(time.Second), + ) + } + return fmt.Sprintf("rate limited: retry budget of %s exhausted — try again later", e.Budget.Round(time.Second)) +} + +// IsRateLimitError reports whether err is a *RateLimitError. +func IsRateLimitError(err error) bool { + var rle *RateLimitError + return errors.As(err, &rle) +} From 0fe16b12925a904f309c903c4b5f58ce4e2db75e Mon Sep 17 00:00:00 2001 From: Dan Mullineux Date: Thu, 25 Jun 2026 15:25:58 +0100 Subject: [PATCH 2/2] Fix RetryMax coupling and add missing test coverage - Derive RetryMax from budget (budget/s + origMax) instead of hardcoding 30; prevents premature cap on budgets > ~30s - Merge duplicate if-retryOn429Budget blocks in Call() - Add TestRetryOn429_5xxStillCapsAtThreeWithBudgetSet: verifies non-429 retry cap is preserved when budget is configured - Add client_internal_test.go covering all parseRetryAfter branches including HTTP-date format, past date, missing header, invalid value Co-Authored-By: Claude Opus 4.8 --- internal/httpcl/client.go | 13 +++--- internal/httpcl/client_internal_test.go | 56 +++++++++++++++++++++++++ internal/httpcl/client_test.go | 26 ++++++++++++ 3 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 internal/httpcl/client_internal_test.go diff --git a/internal/httpcl/client.go b/internal/httpcl/client.go index c889bf52..6ea7722d 100644 --- a/internal/httpcl/client.go +++ b/internal/httpcl/client.go @@ -82,14 +82,14 @@ func New(cfg Config) *Client { rc.Logger = nil // suppress default log output if cfg.RetryOn429Budget > 0 { - // Raise RetryMax to accommodate 429 retries. The CheckRetry below caps - // non-429 retries at the original limit so 5xx behaviour is unchanged. - rc.RetryMax = 30 budget := cfg.RetryOn429Budget origMax := 3 if cfg.DisableRetries { origMax = 0 } + // Raise RetryMax so it never binds before the budget does. + // Each 429 retry consumes ≥1s (Retry-After floor), so budget/s + origMax is sufficient. + rc.RetryMax = int(budget/time.Second) + origMax rc.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) { state, _ := ctx.Value(retryCtxKey{}).(*retryState) if resp != nil && resp.StatusCode == http.StatusTooManyRequests { @@ -171,13 +171,10 @@ func (c *Client) Call(ctx context.Context, r Request) (int, error) { bodyReader = bytes.NewReader(b) } - if c.retryOn429Budget > 0 { - ctx = context.WithValue(ctx, retryCtxKey{}, &retryState{start: time.Now()}) - } - // When 429 retry is enabled, extend the deadline to accommodate retry waits. ctxTimeout := c.timeout if c.retryOn429Budget > 0 { - ctxTimeout = c.retryOn429Budget + c.timeout + ctx = context.WithValue(ctx, retryCtxKey{}, &retryState{start: time.Now()}) + ctxTimeout = c.retryOn429Budget + c.timeout // extend deadline to cover retry waits } ctx, cancel := context.WithTimeout(ctx, ctxTimeout) defer cancel() diff --git a/internal/httpcl/client_internal_test.go b/internal/httpcl/client_internal_test.go new file mode 100644 index 00000000..68baf09a --- /dev/null +++ b/internal/httpcl/client_internal_test.go @@ -0,0 +1,56 @@ +package httpcl + +import ( + "net/http" + "testing" + "time" +) + +func TestParseRetryAfter_Seconds(t *testing.T) { + resp := &http.Response{Header: http.Header{"Retry-After": []string{"30"}}} + if d := parseRetryAfter(resp); d != 30*time.Second { + t.Fatalf("expected 30s, got %v", d) + } +} + +func TestParseRetryAfter_ZeroSeconds(t *testing.T) { + resp := &http.Response{Header: http.Header{"Retry-After": []string{"0"}}} + if d := parseRetryAfter(resp); d != 0 { + t.Fatalf("expected 0, got %v", d) + } +} + +func TestParseRetryAfter_HTTPDate_Future(t *testing.T) { + future := time.Now().Add(5 * time.Second) + resp := &http.Response{ + Header: http.Header{"Retry-After": []string{future.UTC().Format(http.TimeFormat)}}, + } + d := parseRetryAfter(resp) + if d < 3*time.Second || d > 6*time.Second { + t.Fatalf("expected ~5s from HTTP-date, got %v", d) + } +} + +func TestParseRetryAfter_HTTPDate_Past(t *testing.T) { + past := time.Now().Add(-5 * time.Second) + resp := &http.Response{ + Header: http.Header{"Retry-After": []string{past.UTC().Format(http.TimeFormat)}}, + } + if d := parseRetryAfter(resp); d != 0 { + t.Fatalf("expected 0 for past date, got %v", d) + } +} + +func TestParseRetryAfter_Missing(t *testing.T) { + resp := &http.Response{Header: http.Header{}} + if d := parseRetryAfter(resp); d != 0 { + t.Fatalf("expected 0 for missing header, got %v", d) + } +} + +func TestParseRetryAfter_Invalid(t *testing.T) { + resp := &http.Response{Header: http.Header{"Retry-After": []string{"garbage"}}} + if d := parseRetryAfter(resp); d != 0 { + t.Fatalf("expected 0 for invalid value, got %v", d) + } +} diff --git a/internal/httpcl/client_test.go b/internal/httpcl/client_test.go index 0787eca2..3f35bbec 100644 --- a/internal/httpcl/client_test.go +++ b/internal/httpcl/client_test.go @@ -254,6 +254,32 @@ func TestRetryOn429_DisabledByDefault(t *testing.T) { } } +func TestRetryOn429_5xxStillCapsAtThreeWithBudgetSet(t *testing.T) { + var attempts atomic.Int32 + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts.Add(1) + w.WriteHeader(http.StatusInternalServerError) + })) + defer srv.Close() + + c := hc.New(hc.Config{ + BaseURL: srv.URL, + RetryOn429Budget: 30 * time.Second, + }) + + _, err := c.Call(context.Background(), hc.NewRequest("GET", "/")) + if err == nil { + t.Fatal("expected error for 500") + } + if hc.IsRateLimitError(err) { + t.Fatalf("expected plain HTTPError for 500, got RateLimitError") + } + if n := attempts.Load(); n != 4 { + t.Fatalf("expected 4 attempts (1 + 3 retries), got %d", n) + } +} + func TestRouteParamsMultiple(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/api/v2/agents/org/org-1/project/proj-2/runs" {