From d26d37097078a43bf6e9c038730ca48d9d0bc661 Mon Sep 17 00:00:00 2001 From: 0X-SquidSol Date: Thu, 9 Apr 2026 11:31:49 -0400 Subject: [PATCH] fix: add RPC timeout to fetchSlab and getSlot calls to prevent indefinite hangs fetchSlab() from @percolator/sdk and getConnection().getSlot() from @percolator/shared do not accept AbortSignal and have no internal timeout. If the Solana RPC node is slow or unresponsive, requests to /markets/:slab, /api/adl/rankings, and /health hang indefinitely. This adds a shared withRpcTimeout() utility using Promise.race: - Route endpoints (markets, adl): 10s default timeout, returns 504 - Health check: 5s timeout, reports checks.rpc=false on timeout - Both values configurable via RPC_TIMEOUT_MS / HEALTH_RPC_TIMEOUT_MS env vars - Custom RpcTimeoutError class for precise catch handling Co-Authored-By: Claude Opus 4.6 (1M context) --- src/routes/adl.ts | 10 +++++++++- src/routes/health.ts | 3 ++- src/routes/markets.ts | 10 +++++++++- src/utils/rpc-timeout.ts | 41 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 src/utils/rpc-timeout.ts diff --git a/src/routes/adl.ts b/src/routes/adl.ts index 471953d..51d298b 100644 --- a/src/routes/adl.ts +++ b/src/routes/adl.ts @@ -50,6 +50,7 @@ import { createLogger, sanitizeSlabAddress, } from "@percolator/shared"; +import { withRpcTimeout, RpcTimeoutError } from "../utils/rpc-timeout.js"; import { isBlockedSlab } from "../middleware/validateSlab.js"; const logger = createLogger("api:adl"); @@ -173,8 +174,15 @@ export function adlRoutes(): Hono { const connection = getConnection(); let data: Uint8Array; try { - data = await fetchSlab(connection, new PublicKey(slab)); + data = await withRpcTimeout( + fetchSlab(connection, new PublicKey(slab)), + `fetchSlab(${slab})`, + ); } catch (err) { + if (err instanceof RpcTimeoutError) { + logger.warn("RPC timeout fetching slab for ADL", { slab, timeoutMs: err.timeoutMs }); + return c.json({ error: "Upstream RPC timeout", slab }, 504); + } const msg = err instanceof Error ? err.message : String(err); if (msg.includes("not found")) { return c.json({ error: "Slab account not found", slab }, 404); diff --git a/src/routes/health.ts b/src/routes/health.ts index 31b4045..d4e91ea 100644 --- a/src/routes/health.ts +++ b/src/routes/health.ts @@ -1,5 +1,6 @@ import { Hono } from "hono"; import { getConnection, getSupabase, createLogger, truncateErrorMessage } from "@percolator/shared"; +import { withRpcTimeout, HEALTH_RPC_TIMEOUT_MS } from "../utils/rpc-timeout.js"; import { getWebSocketMetrics } from "./ws.js"; import { requireApiKey } from "../middleware/auth.js"; @@ -26,7 +27,7 @@ export function healthRoutes(): Hono { // Check RPC connectivity try { - await getConnection().getSlot(); + await withRpcTimeout(getConnection().getSlot(), "healthcheck:getSlot", HEALTH_RPC_TIMEOUT_MS); checks.rpc = true; } catch (err) { logger.error("RPC check failed", { error: truncateErrorMessage(err instanceof Error ? err.message : err, 120) }); diff --git a/src/routes/markets.ts b/src/routes/markets.ts index fa86676..68d65d8 100644 --- a/src/routes/markets.ts +++ b/src/routes/markets.ts @@ -5,6 +5,7 @@ import { cacheMiddleware } from "../middleware/cache.js"; import { withDbCacheFallback } from "../middleware/db-cache-fallback.js"; import { fetchSlab, parseHeader, parseConfig, parseEngine } from "@percolator/sdk"; import { getConnection, getSupabase, getNetwork, createLogger, sanitizeSlabAddress, truncateErrorMessage } from "@percolator/shared"; +import { withRpcTimeout, RpcTimeoutError } from "../utils/rpc-timeout.js"; const logger = createLogger("api:markets"); @@ -123,7 +124,10 @@ export function marketRoutes(): Hono { try { const connection = getConnection(); const slabPubkey = new PublicKey(slab); - const data = await fetchSlab(connection, slabPubkey); + const data = await withRpcTimeout( + fetchSlab(connection, slabPubkey), + `fetchSlab(${slab})`, + ); const header = parseHeader(data); const cfg = parseConfig(data); const engine = parseEngine(data); @@ -150,6 +154,10 @@ export function marketRoutes(): Hono { }, }); } catch (err) { + if (err instanceof RpcTimeoutError) { + logger.warn("RPC timeout fetching market", { slab, timeoutMs: err.timeoutMs }); + return c.json({ error: "Upstream RPC timeout" }, 504); + } const detail = err instanceof Error ? err.message : "Unknown error"; const isNotFound = detail.includes("not found") || detail.includes("Account does not exist"); if (isNotFound) { diff --git a/src/utils/rpc-timeout.ts b/src/utils/rpc-timeout.ts new file mode 100644 index 0000000..f2bacd4 --- /dev/null +++ b/src/utils/rpc-timeout.ts @@ -0,0 +1,41 @@ +/** + * Timeout wrapper for RPC calls that don't accept AbortSignal. + * + * fetchSlab() and getConnection().getSlot() from the SDK/shared libs take a + * Connection object, not an AbortSignal, so AbortSignal.timeout() cannot be + * threaded through. Promise.race is the only viable approach. + * + * The underlying RPC call is NOT cancelled — Node will GC the dangling promise + * once it settles. This is acceptable because fetchSlab/getSlot are read-only. + */ + +const DEFAULT_RPC_TIMEOUT_MS = 10_000; +const DEFAULT_HEALTH_RPC_TIMEOUT_MS = 5_000; + +export const RPC_TIMEOUT_MS: number = + Number(process.env.RPC_TIMEOUT_MS) || DEFAULT_RPC_TIMEOUT_MS; + +export const HEALTH_RPC_TIMEOUT_MS: number = + Number(process.env.HEALTH_RPC_TIMEOUT_MS) || DEFAULT_HEALTH_RPC_TIMEOUT_MS; + +export class RpcTimeoutError extends Error { + public readonly timeoutMs: number; + + constructor(operation: string, timeoutMs: number) { + super(`RPC timeout: ${operation} did not complete within ${timeoutMs}ms`); + this.name = "RpcTimeoutError"; + this.timeoutMs = timeoutMs; + } +} + +export function withRpcTimeout( + promise: Promise, + operation: string, + timeoutMs: number = RPC_TIMEOUT_MS, +): Promise { + let timer: ReturnType; + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => reject(new RpcTimeoutError(operation, timeoutMs)), timeoutMs); + }); + return Promise.race([promise, timeout]).finally(() => clearTimeout(timer!)); +}