Skip to content

Commit f95c3f6

Browse files
vbediSinghVikram97
authored andcommitted
exponential backoff for reconciler requests
1 parent caa123e commit f95c3f6

16 files changed

+110
-90
lines changed

pkg/controllers/accesslogpolicy_controller.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ package controllers
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223
"reflect"
2324

2425
"golang.org/x/exp/slices"
2526
corev1 "k8s.io/api/core/v1"
26-
"k8s.io/apimachinery/pkg/api/errors"
27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2829
"k8s.io/apimachinery/pkg/runtime"
2930
"k8s.io/apimachinery/pkg/types"
@@ -116,8 +117,8 @@ func (r *accessLogPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Requ
116117
res, retryErr := lattice_runtime.HandleReconcileError(recErr)
117118
if res.RequeueAfter != 0 {
118119
r.log.Infow(ctx, "requeue request", "name", req.Name, "requeueAfter", res.RequeueAfter)
119-
} else if res.RequeueAfter == 0 && retryErr != nil {
120-
r.log.Infow(ctx, "requeue request", "name", req.Name)
120+
} else if retryErr != nil && !errors.Is(retryErr, reconcile.TerminalError(nil)) {
121+
r.log.Infow(ctx, "requeue request using exponential backoff", "name", req.Name)
121122
} else if retryErr == nil {
122123
r.log.Infow(ctx, "reconciled", "name", req.Name)
123124
}
@@ -250,7 +251,7 @@ func (r *accessLogPolicyReconciler) targetRefExists(ctx context.Context, alp *an
250251
return false, fmt.Errorf("access Log Policy targetRef is for unsupported Kind: %s", alp.Spec.TargetRef.Kind)
251252
}
252253

253-
if err != nil && !errors.IsNotFound(err) {
254+
if err != nil && !apierrors.IsNotFound(err) {
254255
return false, err
255256
}
256257

pkg/controllers/gateway_controller.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controllers
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223

2324
anv1alpha1 "github.com/aws/aws-application-networking-k8s/pkg/apis/applicationnetworking/v1alpha1"
@@ -32,13 +33,14 @@ import (
3233
"github.com/aws/aws-application-networking-k8s/pkg/utils"
3334
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
3435

35-
"github.com/pkg/errors"
36+
pkgerrors "github.com/pkg/errors"
3637
corev1 "k8s.io/api/core/v1"
3738
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3839
"k8s.io/apimachinery/pkg/runtime"
3940
"k8s.io/client-go/tools/record"
4041
ctrl "sigs.k8s.io/controller-runtime"
4142
"sigs.k8s.io/controller-runtime/pkg/client"
43+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4244
gwv1 "sigs.k8s.io/gateway-api/apis/v1"
4345

4446
deploy "github.com/aws/aws-application-networking-k8s/pkg/deploy/lattice"
@@ -130,8 +132,8 @@ func (r *gatewayReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
130132
res, retryErr := lattice_runtime.HandleReconcileError(recErr)
131133
if res.RequeueAfter != 0 {
132134
r.log.Infow(ctx, "requeue request", "name", req.Name, "requeueAfter", res.RequeueAfter)
133-
} else if res.RequeueAfter == 0 && retryErr != nil {
134-
r.log.Infow(ctx, "requeue request", "name", req.Name)
135+
} else if retryErr != nil && !errors.Is(retryErr, reconcile.TerminalError(nil)) {
136+
r.log.Infow(ctx, "requeue request using exponential backoff", "name", req.Name)
135137
} else if retryErr == nil {
136138
r.log.Infow(ctx, "reconciled", "name", req.Name)
137139
}
@@ -194,7 +196,7 @@ func (r *gatewayReconciler) reconcileUpsert(ctx context.Context, gw *gwv1.Gatewa
194196
if err != nil {
195197
err2 := r.updateGatewayAcceptStatus(ctx, gw, false)
196198
if err2 != nil {
197-
return errors.Wrap(err2, err.Error())
199+
return pkgerrors.Wrap(err2, err.Error())
198200
}
199201
}
200202

@@ -420,7 +422,7 @@ func UpdateGWListenerStatus(ctx context.Context, k8sClient client.Client, gw *gw
420422
}
421423

422424
if err := k8sClient.Status().Patch(ctx, gw, client.MergeFrom(gwOld)); err != nil {
423-
return errors.Wrapf(err, "listener update failed")
425+
return pkgerrors.Wrapf(err, "listener update failed")
424426
}
425427

426428
if hasValidListener {

pkg/controllers/route_controller.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ import (
4646
"github.com/aws/aws-application-networking-k8s/pkg/config"
4747
"github.com/aws/aws-application-networking-k8s/pkg/controllers/eventhandlers"
4848
"github.com/aws/aws-application-networking-k8s/pkg/deploy"
49-
"github.com/aws/aws-application-networking-k8s/pkg/deploy/lattice"
5049
"github.com/aws/aws-application-networking-k8s/pkg/gateway"
5150
"github.com/aws/aws-application-networking-k8s/pkg/k8s"
5251
"github.com/aws/aws-application-networking-k8s/pkg/model/core"
@@ -261,7 +260,8 @@ func (r *routeReconciler) buildAndDeployModel(
261260
r.log.Debugf(ctx, "stack: %s", json)
262261

263262
if err := r.stackDeployer.Deploy(ctx, stack); err != nil {
264-
if errors.As(err, &lattice.RetryErr) {
263+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
264+
if errors.As(err, &requeueNeededAfter) {
265265
r.eventRecorder.Event(route.K8sObject(), corev1.EventTypeNormal,
266266
k8s.RouteEventReasonRetryReconcile, "retry reconcile...")
267267
} else {
@@ -366,7 +366,7 @@ func (r *routeReconciler) reconcileUpsert(ctx context.Context, req ctrl.Request,
366366

367367
if svc == nil || svc.DnsEntry == nil || svc.DnsEntry.DomainName == nil {
368368
r.log.Infof(ctx, "Either service, dns entry, or domain name is not available. Will Retry")
369-
return errors.New(lattice.LATTICE_RETRY)
369+
return lattice_runtime.NewRetryError()
370370
}
371371

372372
if err := r.updateRouteAnnotation(ctx, *svc.DnsEntry.DomainName, route); err != nil {

pkg/deploy/lattice/error.go

Lines changed: 0 additions & 9 deletions
This file was deleted.

pkg/deploy/lattice/service_manager.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66

77
"github.com/aws/aws-application-networking-k8s/pkg/aws/services"
8+
lattice_runtime "github.com/aws/aws-application-networking-k8s/pkg/runtime"
89
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
910

1011
"github.com/aws/aws-sdk-go/aws"
@@ -244,12 +245,12 @@ func (m *defaultServiceManager) updateAssociations(ctx context.Context, svc *Ser
244245
return nil
245246
}
246247

247-
// returns RetryErr on all non-active Sn-Svc association responses
248+
// returns RetryError on all non-active Sn-Svc association responses
248249
func handleCreateAssociationResp(resp *CreateSnSvcAssocResp) error {
249250
status := aws.StringValue(resp.Status)
250251
if status != vpclattice.ServiceNetworkServiceAssociationStatusActive {
251252
return fmt.Errorf("%w: sn-service-association-id: %s, non-active status: %s",
252-
RetryErr, aws.StringValue(resp.Id), status)
253+
lattice_runtime.NewRetryError(), aws.StringValue(resp.Id), status)
253254
}
254255
return nil
255256
}
@@ -281,7 +282,7 @@ func associationsDiff(svc *Service, curAssocs []*SnSvcAssocSummary) ([]string, [
281282
// TODO: we should have something more lightweight, retrying full reconciliation looks to heavy
282283
if aws.StringValue(oldSn.Status) == vpclattice.ServiceNetworkServiceAssociationStatusDeleteInProgress {
283284
return nil, nil, fmt.Errorf("%w: want to associate sn: %s to svc: %s, but status is: %s",
284-
RetryErr, newSn, svc.LatticeServiceName(), *oldSn.Status)
285+
lattice_runtime.NewRetryError(), newSn, svc.LatticeServiceName(), *oldSn.Status)
285286
}
286287
// TODO: if assoc in failed state, may be we should try to re-create?
287288
}

pkg/deploy/lattice/service_manager_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
mocks "github.com/aws/aws-application-networking-k8s/pkg/aws/services"
1010
"github.com/aws/aws-application-networking-k8s/pkg/model/core"
1111
model "github.com/aws/aws-application-networking-k8s/pkg/model/lattice"
12+
lattice_runtime "github.com/aws/aws-application-networking-k8s/pkg/runtime"
1213
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
1314
"github.com/aws/aws-sdk-go/aws"
1415
"github.com/aws/aws-sdk-go/service/vpclattice"
@@ -398,7 +399,8 @@ func TestHandleSnSvcAssocResp(t *testing.T) {
398399
Status: aws.String(vpclattice.ServiceNetworkServiceAssociationStatusCreateInProgress),
399400
}
400401
err := handleCreateAssociationResp(resp)
401-
assert.True(t, errors.Is(err, RetryErr))
402+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
403+
assert.True(t, errors.As(err, &requeueNeededAfter))
402404
})
403405

404406
}
@@ -459,7 +461,8 @@ func TestSnSvcAssocsDiff(t *testing.T) {
459461
Status: aws.String(vpclattice.ServiceNetworkServiceAssociationStatusDeleteInProgress),
460462
}}
461463
_, _, err := associationsDiff(svc, assocs)
462-
assert.True(t, errors.Is(err, RetryErr))
464+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
465+
assert.True(t, errors.As(err, &requeueNeededAfter))
463466
})
464467

465468
}

pkg/deploy/lattice/service_network_manager.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ package lattice
22

33
import (
44
"context"
5-
"errors"
65
"fmt"
76

87
"golang.org/x/exp/slices"
98

109
"github.com/aws/aws-application-networking-k8s/pkg/aws/services"
10+
lattice_runtime "github.com/aws/aws-application-networking-k8s/pkg/runtime"
1111
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
1212

1313
"github.com/aws/aws-sdk-go/aws"
@@ -80,7 +80,7 @@ func (m *defaultServiceNetworkManager) UpsertVpcAssociation(ctx context.Context,
8080
case vpclattice.ServiceNetworkVpcAssociationStatusActive:
8181
return *resp.Arn, nil
8282
default:
83-
return *resp.Arn, fmt.Errorf("%w, vpc association status in %s", RetryErr, status)
83+
return *resp.Arn, fmt.Errorf("%w, vpc association status in %s", lattice_runtime.NewRetryError(), status)
8484
}
8585
}
8686
}
@@ -125,7 +125,7 @@ func (m *defaultServiceNetworkManager) DeleteVpcAssociation(ctx context.Context,
125125
if err != nil {
126126
m.log.Infof(ctx, "Failed to delete association %s for %s, with response %s and err %s", *snva.Arn, snName, resp, err.Error())
127127
}
128-
return errors.New(LATTICE_RETRY)
128+
return lattice_runtime.NewRetryError()
129129
}
130130
return nil
131131
}
@@ -163,7 +163,7 @@ func (m *defaultServiceNetworkManager) getActiveVpcAssociation(ctx context.Conte
163163
return nil, nil
164164
default:
165165
// a mutation is in progress, try later
166-
return nil, errors.New(LATTICE_RETRY)
166+
return nil, lattice_runtime.NewRetryError()
167167
}
168168
}
169169

@@ -253,7 +253,7 @@ func (m *defaultServiceNetworkManager) updateServiceNetworkVpcAssociation(ctx co
253253
SnvaSecurityGroupIds: updateSnvaResp.SecurityGroupIds,
254254
}, nil
255255
} else {
256-
return model.ServiceNetworkStatus{}, fmt.Errorf("%w, update snva status: %s", RetryErr, *updateSnvaResp.Status)
256+
return model.ServiceNetworkStatus{}, fmt.Errorf("%w, update snva status: %s", lattice_runtime.NewRetryError(), *updateSnvaResp.Status)
257257
}
258258
}
259259

pkg/deploy/lattice/service_network_manager_test.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/stretchr/testify/assert"
1212

1313
"github.com/aws/aws-application-networking-k8s/pkg/config"
14+
lattice_runtime "github.com/aws/aws-application-networking-k8s/pkg/runtime"
1415
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
1516

1617
pkg_aws "github.com/aws/aws-application-networking-k8s/pkg/aws"
@@ -147,7 +148,8 @@ func Test_CreateOrUpdateServiceNetwork_SnAlreadyExist_ServiceNetworkVpcAssociati
147148
resp, err := snMgr.CreateOrUpdate(ctx, &snCreateInput)
148149

149150
assert.NotNil(t, err)
150-
assert.Equal(t, err, errors.New(LATTICE_RETRY))
151+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
152+
assert.True(t, errors.As(err, &requeueNeededAfter))
151153
assert.Equal(t, resp.ServiceNetworkARN, "")
152154
assert.Equal(t, resp.ServiceNetworkID, "")
153155
}
@@ -199,7 +201,8 @@ func Test_CreateOrUpdateServiceNetwork_SnAlreadyExist_ServiceNetworkVpcAssociati
199201
resp, err := snMgr.CreateOrUpdate(ctx, &snCreateInput)
200202

201203
assert.NotNil(t, err)
202-
assert.Equal(t, err, errors.New(LATTICE_RETRY))
204+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
205+
assert.True(t, errors.As(err, &requeueNeededAfter))
203206
assert.Equal(t, resp.ServiceNetworkARN, "")
204207
assert.Equal(t, resp.ServiceNetworkID, "")
205208
}
@@ -629,7 +632,8 @@ func Test_defaultServiceNetworkManager_CreateOrUpdate_SnExists_SnvaCreateInProgr
629632
snMgr := NewDefaultServiceNetworkManager(gwlog.FallbackLogger, cloud)
630633
_, err := snMgr.UpsertVpcAssociation(ctx, name, securityGroupIds)
631634

632-
assert.Equal(t, err, errors.New(LATTICE_RETRY))
635+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
636+
assert.True(t, errors.As(err, &requeueNeededAfter))
633637
}
634638

635639
func Test_defaultServiceNetworkManager_CreateOrUpdate_SnExists_SnvaExists_CannotUpdateSecurityGroupsFromNonemptyToEmpty(t *testing.T) {

pkg/deploy/lattice/target_group_manager.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
pkg_aws "github.com/aws/aws-application-networking-k8s/pkg/aws"
1919
model "github.com/aws/aws-application-networking-k8s/pkg/model/lattice"
20+
lattice_runtime "github.com/aws/aws-application-networking-k8s/pkg/runtime"
2021
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
2122
)
2223

@@ -115,7 +116,7 @@ func (s *defaultTargetGroupManager) create(ctx context.Context, modelTg *model.T
115116
latticeTgStatus != vpclattice.TargetGroupStatusCreateInProgress {
116117

117118
s.log.Infof(ctx, "Target group is not in the desired state. State is %s, will retry", latticeTgStatus)
118-
return model.TargetGroupStatus{}, errors.New(LATTICE_RETRY)
119+
return model.TargetGroupStatus{}, lattice_runtime.NewRetryError()
119120
}
120121

121122
// create-in-progress is considered success
@@ -341,7 +342,7 @@ func (s *defaultTargetGroupManager) findTargetGroup(
341342
if match {
342343
switch status {
343344
case vpclattice.TargetGroupStatusCreateInProgress, vpclattice.TargetGroupStatusDeleteInProgress:
344-
return nil, errors.New(LATTICE_RETRY)
345+
return nil, lattice_runtime.NewRetryError()
345346
case vpclattice.TargetGroupStatusDeleteFailed, vpclattice.TargetGroupStatusActive:
346347
return latticeTg, nil
347348
}

pkg/deploy/lattice/target_group_manager_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/aws/aws-application-networking-k8s/pkg/config"
2525
"github.com/aws/aws-application-networking-k8s/pkg/model/core"
2626
model "github.com/aws/aws-application-networking-k8s/pkg/model/lattice"
27+
lattice_runtime "github.com/aws/aws-application-networking-k8s/pkg/runtime"
2728
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
2829
)
2930

@@ -363,7 +364,8 @@ func Test_CreateTargetGroup_ExistingTG_Status_Retry(t *testing.T) {
363364
tgManager := NewTargetGroupManager(gwlog.FallbackLogger, cloud, nil)
364365
_, err := tgManager.Upsert(ctx, &tgCreateInput)
365366

366-
assert.Equal(t, errors.New(LATTICE_RETRY), err)
367+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
368+
assert.True(t, errors.As(err, &requeueNeededAfter))
367369
})
368370
}
369371
}
@@ -412,7 +414,8 @@ func Test_CreateTargetGroup_NewTG_RetryStatus(t *testing.T) {
412414
tgManager := NewTargetGroupManager(gwlog.FallbackLogger, cloud, nil)
413415
_, err := tgManager.Upsert(ctx, &tgCreateInput)
414416

415-
assert.Equal(t, errors.New(LATTICE_RETRY), err)
417+
var requeueNeededAfter *lattice_runtime.RequeueNeededAfter
418+
assert.True(t, errors.As(err, &requeueNeededAfter))
416419
})
417420
}
418421
}

0 commit comments

Comments
 (0)