diff --git a/errors.go b/errors.go index 66bdcca..95e95c0 100644 --- a/errors.go +++ b/errors.go @@ -10,9 +10,11 @@ var ( ErrInvalidTransition = errors.New("invalid transition") ) -// ErrorCounter defines an interface for counting occurrences of errors with optional labels. +// ErrorCounter defines an interface for counting occurrences of errors keyed by stable labels. +// At least one label is required — labels should identify the process and run (e.g. processName, runID). +// The error value is not used for keying because error messages often contain dynamic data. type ErrorCounter interface { - Add(err error, labels ...string) int - Count(err error, labels ...string) int - Clear(err error, labels ...string) + Add(err error, label string, extras ...string) int + Count(err error, label string, extras ...string) int + Clear(err error, label string, extras ...string) } diff --git a/internal/errorcounter/errorcounter.go b/internal/errorcounter/errorcounter.go index 45b1490..e293913 100644 --- a/internal/errorcounter/errorcounter.go +++ b/internal/errorcounter/errorcounter.go @@ -16,31 +16,37 @@ type Counter struct { store map[string]int } -func (c *Counter) Add(err error, labels ...string) int { +func (c *Counter) Add(err error, label string, extras ...string) int { c.mu.Lock() defer c.mu.Unlock() - errMsg := err.Error() - errMsg += strings.Join(labels, "-") - c.store[errMsg] += 1 - return c.store[errMsg] + key := makeKey(label, extras) + c.store[key] += 1 + return c.store[key] } -func (c *Counter) Count(err error, labels ...string) int { +func (c *Counter) Count(err error, label string, extras ...string) int { c.mu.Lock() defer c.mu.Unlock() - errMsg := err.Error() - errMsg += strings.Join(labels, "-") - return c.store[errMsg] + key := makeKey(label, extras) + return c.store[key] } -func (c *Counter) Clear(err error, labels ...string) { +func (c *Counter) Clear(err error, label string, extras ...string) { c.mu.Lock() defer c.mu.Unlock() - errMsg := err.Error() - errMsg += strings.Join(labels, "-") - c.store[errMsg] = 0 - return + key := makeKey(label, extras) + delete(c.store, key) +} + +// makeKey builds a stable key from labels only. The error message is excluded +// because it often contains dynamic data (timestamps, IDs) which would create +// unique keys and prevent the PauseAfterErrCount threshold from ever being reached. +func makeKey(label string, extras []string) string { + if len(extras) == 0 { + return label + } + return label + "-" + strings.Join(extras, "-") } diff --git a/internal/errorcounter/errorcounter_test.go b/internal/errorcounter/errorcounter_test.go index 798f294..4f78d3f 100644 --- a/internal/errorcounter/errorcounter_test.go +++ b/internal/errorcounter/errorcounter_test.go @@ -10,52 +10,77 @@ import ( ) func TestErrorCounter(t *testing.T) { - testCases := []struct { - name string - inputErr error - labels []string - iterationCount int - expectedCount int - }{ - { - name: "Add 3 and get 3", - inputErr: errors.New("test error"), - labels: []string{"label 1", "label 2"}, - iterationCount: 3, - expectedCount: 3, - }, - { - name: "Add 1 and get 1 - no labels", - inputErr: errors.New("test error"), - labels: []string{}, - iterationCount: 3, - expectedCount: 3, - }, - { - name: "Add 0 and get 0", - inputErr: errors.New("test error"), - labels: []string{"label 1"}, - iterationCount: 0, - expectedCount: 0, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - c := errorcounter.New() - - var currentCount int - for i := 0; i < tc.iterationCount; i++ { - currentCount = c.Add(tc.inputErr, tc.labels...) - } - require.Equal(t, tc.expectedCount, currentCount) - - count := c.Count(tc.inputErr, tc.labels...) - require.Equal(t, tc.expectedCount, count) - - c.Clear(tc.inputErr, tc.labels...) - count = c.Count(tc.inputErr, tc.labels...) - require.Equal(t, 0, count) - }) - } + t.Run("Add 3 and get 3", func(t *testing.T) { + c := errorcounter.New() + err := errors.New("test error") + + c.Add(err, "label 1", "label 2") + c.Add(err, "label 1", "label 2") + count := c.Add(err, "label 1", "label 2") + require.Equal(t, 3, count) + + require.Equal(t, 3, c.Count(err, "label 1", "label 2")) + + c.Clear(err, "label 1", "label 2") + require.Equal(t, 0, c.Count(err, "label 1", "label 2")) + }) + + t.Run("Single label", func(t *testing.T) { + c := errorcounter.New() + err := errors.New("test error") + + c.Add(err, "only-label") + count := c.Add(err, "only-label") + require.Equal(t, 2, count) + + require.Equal(t, 2, c.Count(err, "only-label")) + + c.Clear(err, "only-label") + require.Equal(t, 0, c.Count(err, "only-label")) + }) + + t.Run("Add 0 and get 0", func(t *testing.T) { + c := errorcounter.New() + require.Equal(t, 0, c.Count(errors.New("test error"), "label 1")) + }) +} + +func TestErrorCounter_DifferentErrorsSameLabels(t *testing.T) { + c := errorcounter.New() + + // Different error messages with the same labels should share a counter. + c.Add(errors.New("connection refused at 10:00:01"), "processName", "run-123") + c.Add(errors.New("connection refused at 10:00:02"), "processName", "run-123") + count := c.Add(errors.New("timeout after 30s"), "processName", "run-123") + + require.Equal(t, 3, count) + + // Count should work regardless of which error is passed. + require.Equal(t, 3, c.Count(errors.New("completely different error"), "processName", "run-123")) +} + +func TestErrorCounter_ClearRemovesKey(t *testing.T) { + c := errorcounter.New() + + c.Add(errors.New("err"), "process", "run-1") + c.Add(errors.New("err"), "process", "run-1") + require.Equal(t, 2, c.Count(errors.New("err"), "process", "run-1")) + + c.Clear(errors.New("err"), "process", "run-1") + + // After clear, count should be 0 and next Add should return 1. + require.Equal(t, 0, c.Count(errors.New("err"), "process", "run-1")) + require.Equal(t, 1, c.Add(errors.New("err"), "process", "run-1")) +} + +func TestErrorCounter_DifferentLabelsSeparateCounters(t *testing.T) { + c := errorcounter.New() + err := errors.New("same error") + + c.Add(err, "process-a", "run-1") + c.Add(err, "process-a", "run-1") + c.Add(err, "process-b", "run-2") + + require.Equal(t, 2, c.Count(err, "process-a", "run-1")) + require.Equal(t, 1, c.Count(err, "process-b", "run-2")) }