From b8ccd87186bd23b2dfc15ef87d69545eb1e85eff Mon Sep 17 00:00:00 2001 From: "posthog[bot]" <206114724+posthog[bot]@users.noreply.github.com> Date: Wed, 1 Jul 2026 08:53:27 +0000 Subject: [PATCH] fix(slack): don't capture transient network blips in Slack-connect poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fetchSlackConnected fired a bare axios.get and, unlike its siblings, skipped handleApiError. On a connect timeout Node's net layer throws a messageless AggregateError, which the SlackConnectScreen poll handed straight to captureException — landing in error tracking as an untriageable AggregateError. Route the failure through handleApiError so it always carries a real message, add isTransientNetworkError to recognise connect-timeout AggregateErrors and response-less connection-code axios errors, flag them via ApiError.isTransient, and skip captureException for those in the poll's catch. The poll already degrades gracefully (stops polling, falls back to the connect nudge), so transient connectivity noise is dropped while genuine auth/permission failures stay visible. Generated-By: PostHog Code Task-Id: a81876d2-36fa-4afb-a610-c70227ba8da1 --- src/lib/__tests__/api.test.ts | 97 +++++++++++++++++++++++ src/lib/api.ts | 89 +++++++++++++++++---- src/ui/tui/screens/SlackConnectScreen.tsx | 16 ++-- 3 files changed, 182 insertions(+), 20 deletions(-) create mode 100644 src/lib/__tests__/api.test.ts diff --git a/src/lib/__tests__/api.test.ts b/src/lib/__tests__/api.test.ts new file mode 100644 index 00000000..6ed9f9d4 --- /dev/null +++ b/src/lib/__tests__/api.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect } from 'vitest'; +import { AxiosError, AxiosHeaders } from 'axios'; +import { z } from 'zod'; +import { ApiError, handleApiError, isTransientNetworkError } from '@lib/api'; + +/** + * Regression coverage for the Slack-connect poll noise: a transient connect + * timeout used to surface a messageless `AggregateError` in error tracking + * because `fetchSlackConnected` skipped `handleApiError`. These lock in that + * transient connectivity failures (a) are recognised, (b) carry a real + * message once routed through `handleApiError`, and (c) are flagged so the + * poll can drop them, while genuine auth failures stay visible. + */ +function axiosNetworkError(code: string): AxiosError { + const err = new AxiosError('', code); + err.config = { + url: '/api/projects/1/integrations/', + headers: new AxiosHeaders(), + }; + return err; +} + +function axiosResponseError(status: number, detail?: string): AxiosError { + const err = new AxiosError('Request failed'); + err.config = { + url: '/api/projects/1/integrations/', + headers: new AxiosHeaders(), + }; + err.response = { + status, + statusText: '', + data: detail ? { detail } : {}, + headers: {}, + config: err.config, + }; + return err; +} + +describe('isTransientNetworkError', () => { + it('treats a messageless AggregateError (connect-timeout signature) as transient', () => { + expect(isTransientNetworkError(new AggregateError([], ''))).toBe(true); + }); + + it('treats response-less axios connection errors as transient', () => { + for (const code of [ + 'ETIMEDOUT', + 'ECONNREFUSED', + 'ECONNRESET', + 'ENOTFOUND', + ]) { + expect(isTransientNetworkError(axiosNetworkError(code))).toBe(true); + } + }); + + it('does not treat an HTTP error response as transient', () => { + expect(isTransientNetworkError(axiosResponseError(403))).toBe(false); + }); + + it('does not treat a Zod error or a plain error as transient', () => { + expect(isTransientNetworkError(new z.ZodError([]))).toBe(false); + expect(isTransientNetworkError(new Error('boom'))).toBe(false); + }); +}); + +describe('handleApiError', () => { + it('gives a transient AggregateError a real message and flags it', () => { + const apiError = handleApiError( + new AggregateError([], ''), + 'check Slack integration', + ); + expect(apiError).toBeInstanceOf(ApiError); + expect(apiError.isTransient).toBe(true); + expect(apiError.message).toContain('Network connection failed'); + expect(apiError.message).toContain('check Slack integration'); + // The whole point: no longer a messageless error. + expect(apiError.message.length).toBeGreaterThan(0); + }); + + it('includes the axios error code for a response-less connection failure', () => { + const apiError = handleApiError( + axiosNetworkError('ETIMEDOUT'), + 'check Slack integration', + ); + expect(apiError.isTransient).toBe(true); + expect(apiError.message).toContain('ETIMEDOUT'); + }); + + it('keeps genuine auth failures visible (not transient)', () => { + const apiError = handleApiError( + axiosResponseError(403), + 'check Slack integration', + ); + expect(apiError.isTransient).toBe(false); + expect(apiError.statusCode).toBe(403); + expect(apiError.message).toContain('Access denied'); + }); +}); diff --git a/src/lib/api.ts b/src/lib/api.ts index ba478a82..d1a6f627 100644 --- a/src/lib/api.ts +++ b/src/lib/api.ts @@ -132,12 +132,49 @@ export class ApiError extends Error { message: string, public readonly statusCode?: number, public readonly endpoint?: string, + // True when the failure is a transient connectivity blip (connect + // timeout, refused, DNS hiccup) rather than an actionable API/auth + // error. Callers that already degrade gracefully use this to skip + // capturing unactionable network noise. See isTransientNetworkError. + public readonly isTransient: boolean = false, ) { super(message); this.name = 'ApiError'; } } +// Node's connect path (`internalConnectMultiple`) throws a messageless +// AggregateError when every candidate address times out or is refused; axios +// otherwise surfaces connectivity failures as a response-less AxiosError with +// one of these codes. None of them are actionable — they're the network being +// flaky, not a bug or an auth problem. +const TRANSIENT_NETWORK_CODES = new Set([ + 'ETIMEDOUT', + 'ECONNREFUSED', + 'ECONNRESET', + 'ECONNABORTED', + 'ENOTFOUND', + 'EAI_AGAIN', + 'ENETUNREACH', + 'EHOSTUNREACH', +]); + +/** + * Whether an error is a transient network/connectivity failure as opposed to + * an actionable API response (401/403/404/…) or a schema mismatch. Recognises + * the messageless connect-timeout `AggregateError` as well as response-less + * axios errors carrying a known connection error code. + */ +export function isTransientNetworkError(error: unknown): boolean { + if (error instanceof AggregateError) return true; + if (axios.isAxiosError(error)) { + return ( + !error.response && !!error.code && TRANSIENT_NETWORK_CODES.has(error.code) + ); + } + return false; +} + export async function fetchUserData( accessToken: string, baseUrl: string, @@ -236,9 +273,12 @@ const IntegrationsResponseSchema = z.object({ /** * Check whether the project already has a Slack integration connected. - * Requires the `integration:read` scope. Throws on failure — callers - * (including the SlackConnectScreen poll) decide how to degrade and - * are responsible for capturing the error exactly once. + * Requires the `integration:read` scope. Throws an {@link ApiError} on + * failure — routed through {@link handleApiError} so the thrown error always + * carries a real message (a raw connect-timeout AggregateError has none) and + * flags transient connectivity blips via `isTransient`. Callers (including + * the SlackConnectScreen poll) decide how to degrade and are responsible for + * capturing the error exactly once. */ export async function fetchSlackConnected( accessToken: string, @@ -246,22 +286,41 @@ export async function fetchSlackConnected( baseUrl: string, signal?: AbortSignal, ): Promise { - const response = await axios.get( - `${baseUrl}/api/projects/${projectId}/integrations/`, - { - headers: { - Authorization: `Bearer ${accessToken}`, - 'User-Agent': WIZARD_USER_AGENT, + try { + const response = await axios.get( + `${baseUrl}/api/projects/${projectId}/integrations/`, + { + headers: { + Authorization: `Bearer ${accessToken}`, + 'User-Agent': WIZARD_USER_AGENT, + }, + signal, }, - signal, - }, - ); - const parsed = IntegrationsResponseSchema.safeParse(response.data); - if (!parsed.success) return false; - return parsed.data.results.some((i) => i.kind === 'slack'); + ); + const parsed = IntegrationsResponseSchema.safeParse(response.data); + if (!parsed.success) return false; + return parsed.data.results.some((i) => i.kind === 'slack'); + } catch (error) { + throw handleApiError(error, 'check Slack integration'); + } } export function handleApiError(error: unknown, operation: string): ApiError { + if (isTransientNetworkError(error)) { + // A messageless AggregateError has no `.code`; fall back to a stable + // label so the captured error is still triageable rather than blank. + const code = axios.isAxiosError(error) ? error.code : undefined; + const endpoint = axios.isAxiosError(error) ? error.config?.url : undefined; + return new ApiError( + `Network connection failed (${ + code ?? 'connect timeout' + }) while trying to ${operation}`, + undefined, + endpoint, + true, + ); + } + if (axios.isAxiosError(error)) { const axiosError = error as AxiosError<{ detail?: string }>; const status = axiosError.response?.status; diff --git a/src/ui/tui/screens/SlackConnectScreen.tsx b/src/ui/tui/screens/SlackConnectScreen.tsx index 9db8f74d..4d0d6e36 100644 --- a/src/ui/tui/screens/SlackConnectScreen.tsx +++ b/src/ui/tui/screens/SlackConnectScreen.tsx @@ -31,7 +31,7 @@ import { Colors, Icons } from '@ui/tui/styles'; import { PickerMenu, LoadingBox } from '@ui/tui/primitives/index'; import { useKeyBindings, KeyMatch } from '@ui/tui/hooks/useKeyBindings'; import { getSlackAppCard } from '@lib/mcp-role-prompts'; -import { fetchSlackConnected } from '@lib/api'; +import { fetchSlackConnected, ApiError } from '@lib/api'; import { Program } from '@lib/programs/program-registry'; import { getOrAskForProjectData } from '@utils/setup-utils'; import { analytics } from '@utils/analytics'; @@ -155,13 +155,19 @@ export const SlackConnectScreen = ({ store }: SlackConnectScreenProps) => { }) .catch((err: unknown) => { if (cancelled) return; - // Capture once and stop polling — repeating a failing call - // every tick would spam error tracking. The nudge copy is - // the fallback either way; a failed check counts as not - // connected so the screen doesn't sit on the loading state. + // Stop polling — repeating a failing call every tick would spam + // error tracking. The nudge copy is the fallback either way; a + // failed check counts as not connected so the screen doesn't sit + // on the loading state. if (store.session.slackConnected === null) { store.setSlackConnected(false); } + // Skip capturing transient connectivity blips (connect timeout, + // refused, DNS hiccup): this path already degrades gracefully, so + // a messageless network error is unactionable noise. Genuine + // auth/permission failures still carry a real message and are + // captured once. + if (err instanceof ApiError && err.isTransient) return; analytics.captureException( err instanceof Error ? err : new Error(String(err)), { step: 'slack_connected_check' },