diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts b/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts index 3863e4c1c..d83a2ce34 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts @@ -318,6 +318,9 @@ import { type ListAliasesCommandOutput, } from '@aws-sdk/client-kms'; import { + GetFunctionConfigurationCommand, + type GetFunctionConfigurationCommandInput, + type GetFunctionConfigurationCommandOutput, InvokeCommand, type InvokeCommandInput, type InvokeCommandOutput, @@ -598,6 +601,7 @@ export interface IKMSClient { } export interface ILambdaClient { + getFunctionConfiguration(input: GetFunctionConfigurationCommandInput): Promise; invokeCommand(input: InvokeCommandInput): Promise; publishVersion(input: PublishVersionCommandInput): Promise; updateAlias(input: UpdateAliasCommandInput): Promise; @@ -1048,6 +1052,8 @@ export class SDK { public lambda(): ILambdaClient { const client = new LambdaClient(this.config); return { + getFunctionConfiguration: (input: GetFunctionConfigurationCommandInput): Promise => + client.send(new GetFunctionConfigurationCommand(input)), invokeCommand: (input: InvokeCommandInput): Promise => client.send(new InvokeCommand(input)), publishVersion: (input: PublishVersionCommandInput): Promise => client.send(new PublishVersionCommand(input)), diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index 260234e64..a232005be 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -1,5 +1,6 @@ import type { AdditionalDiagnosticContext } from '../../actions/diagnose'; -import type { ICloudWatchLogsClient, IECSClient, SDK } from '../aws-auth/sdk'; +import { deserializeStructure } from '../../util'; +import type { ICloudFormationClient, ICloudWatchLogsClient, IECSClient, ILambdaClient, SDK } from '../aws-auth/sdk'; import type { ResourceError } from '../stack-events/resource-errors'; /** @@ -10,6 +11,143 @@ import type { ResourceError } from '../stack-events/resource-errors'; */ const MAX_LOG_LINES = 50; +/** Fallback look-back when no failure timestamp is available. */ +const FALLBACK_LOG_WINDOW_MS = 30 * 60 * 1000; + +/** + * Turn raw CloudWatch log event messages into the trimmed lines we render. + * + * Keeps only the most recent {@link MAX_LOG_LINES} (newer output is more useful for + * diagnosis) and prepends an "N earlier lines omitted" marker when truncation happened. + * This is the single truncation point shared by all CloudWatch contexts — the formatter + * renders the result verbatim. + */ +function trimToRecentLines(events: Array<{ message?: string }>): string[] { + const allMessages = events + .map(e => e.message?.trimEnd()) + .filter((m): m is string => m != null); + const messages = allMessages.slice(-MAX_LOG_LINES); + const omitted = allMessages.length - messages.length; + if (omitted > 0) { + messages.unshift(`... (${omitted} earlier lines omitted)`); + } + return messages; +} + +/** + * Lambda platform log lines (text format) that carry no application signal. + */ +const LAMBDA_PLATFORM_LINE = /^(INIT_START|START RequestId:|END RequestId:|REPORT RequestId:)/; + +/** + * Normalize Lambda CloudWatch log events into readable lines. + * + * Lambda emits logs in one of two formats (per the function's `LoggingConfig.LogFormat`): + * - **Text**: `\t\t\t`, plus platform lines. + * - **JSON**: one JSON object per event (`{ timestamp, level, message, ... }`). + * + * For both we surface `LEVEL message` (or just the message when there's no level), strip the + * redundant per-line timestamp/requestId (it's all one invocation), and drop pure platform + * boilerplate. We never drop application output — failure detail is often logged at INFO + * (e.g. the cfn-response "Response body" line). Anything we don't recognize passes through + * verbatim, and the full logs remain available via the console link. + * + * This is Lambda-specific; it is not applied to ECS logs, which are arbitrary container output. + */ +export function parseLambdaLogEvents(events: Array<{ message?: string }>): Array<{ message: string }> { + const out: Array<{ message: string }> = []; + for (const e of events) { + const raw = e.message; + if (raw == null) { + continue; + } + const normalized = normalizeLambdaLine(raw); + if (normalized !== undefined) { + out.push({ message: normalized }); + } + } + return out; +} + +/** + * Normalize a single Lambda log line. Returns `undefined` to drop the line (platform noise), + * or the cleaned-up text to keep. + */ +function normalizeLambdaLine(raw: string): string | undefined { + const trimmed = raw.trimEnd(); + + // JSON-format event: { timestamp, level, message, ... } (one object per line). + const jsonResult = normalizeJsonLogLine(trimmed); + if (jsonResult !== undefined) { + return jsonResult || undefined; + } + + // Text-format platform boilerplate: drop. + if (LAMBDA_PLATFORM_LINE.test(trimmed)) { + return undefined; + } + + // Text-format app line: `\t\t\t`. + // Strip the timestamp + requestId prefix; keep `LEVEL message` (or the rest verbatim). + const parts = trimmed.split('\t'); + if (parts.length >= 4 && /^\d{4}-\d{2}-\d{2}T/.test(parts[0])) { + const level = parts[2]; + const message = parts.slice(3).join('\t'); + return formatLeveledLine(level, message); + } + + // Unrecognized (continuation line, plain stdout, etc.) — keep verbatim. + return trimmed; +} + +/** + * If `line` is a JSON-format Lambda log object, render it as `LEVELmessage` + * (or just the message when there's no level). Returns `undefined` when it isn't JSON. + * + * Drops JSON platform events (`type`/`record` envelopes for `platform.*`), which carry no + * application signal. + */ +function normalizeJsonLogLine(line: string): string | undefined { + if (!line.startsWith('{')) { + return undefined; + } + let obj: any; + try { + obj = JSON.parse(line); + } catch { + return undefined; + } + if (!obj || typeof obj !== 'object') { + return undefined; + } + + // Platform events (e.g. { type: 'platform.report', record: {...} }) — drop. + if (typeof obj.type === 'string' && obj.type.startsWith('platform.')) { + return ''; + } + + const level = typeof obj.level === 'string' ? obj.level : undefined; + // Lambda uses `message`; a thrown error envelope uses `errorMessage` (+ optional stackTrace). + let message: string; + if (typeof obj.message === 'string') { + message = obj.message; + } else if (typeof obj.errorMessage === 'string') { + message = Array.isArray(obj.stackTrace) ? [obj.errorMessage, ...obj.stackTrace].join('\n') : obj.errorMessage; + } else { + // JSON, but not a shape we recognize — render compactly rather than dropping signal. + message = line; + } + return level ? formatLeveledLine(level, message) : message; +} + +/** + * Render a log level and message as `LEVEL message`, padding the level to a fixed width so + * lines align in the terminal. Multi-line messages keep their internal newlines. + */ +function formatLeveledLine(level: string, message: string): string { + return `${level.padEnd(5)} ${message}`; +} + /** * Options that influence how a resource is investigated. */ @@ -38,12 +176,14 @@ export async function investigateResource( debug: (msg: string) => Promise, options: InvestigateOptions = {}, ): Promise { - switch (err.resourceType) { - case 'AWS::ECS::Service': - return investigateEcsService(err, sdk, debug, options); - default: - return []; + const resourceType = err.resourceType ?? ''; + if (resourceType === 'AWS::ECS::Service') { + return investigateEcsService(err, sdk, debug, options); + } + if (resourceType === 'AWS::CloudFormation::CustomResource' || resourceType.startsWith('Custom::')) { + return investigateCustomResource(err, sdk, debug); } + return []; } async function investigateEcsService( @@ -347,7 +487,7 @@ async function fetchRecentLogs( const resp = await cwl.filterLogEvents({ logGroupName: logConfig.logGroup, - startTime: Date.now() - 30 * 60 * 1000, + startTime: Date.now() - FALLBACK_LOG_WINDOW_MS, limit: 1000, ...(targetStream ? { logStreamNames: [targetStream] } @@ -360,16 +500,7 @@ async function fetchRecentLogs( return undefined; } - // Keep the most recent lines (newer output is more useful for diagnosis). - // This is the only truncation point — the formatter renders these verbatim. - const allMessages = events - .map(e => e.message?.trimEnd()) - .filter((m): m is string => m != null); - const messages: string[] = allMessages.slice(-MAX_LOG_LINES); - const omitted = allMessages.length - messages.length; - if (omitted > 0) { - messages.unshift(`... (${omitted} earlier lines omitted)`); - } + const messages = trimToRecentLines(events); if (taskIds.length > 1) { messages.push(`(showing logs from last failed task; ${taskIds.length - 1} other failed task(s) available in console)`); @@ -391,6 +522,354 @@ async function fetchRecentLogs( } } +/** + * How far before/after the failure event to search CloudWatch Logs when we have a timestamp. + * + * The pre-window absorbs minor clock skew; the post-window covers output the function + * emits while it runs after the CloudFormation event was recorded. + */ +const LOG_WINDOW_BEFORE_MS = 2 * 60 * 1000; +const LOG_WINDOW_AFTER_MS = 15 * 60 * 1000; + +/** + * Investigate a failed custom resource by surfacing its backing Lambda's CloudWatch logs. + * + * The CloudFormation event does not name the backing function — only the resource's + * `ServiceToken` (in the template) does. We resolve that to a function name, derive the + * log group (the `/aws/lambda/` convention, confirmed via the function's LoggingConfig + * only if the convention turns up empty), and fetch the relevant log lines. + * + * When the handler uses the cfn-response library, the failing log stream name is embedded + * in the status reason ("See the details in CloudWatch Log Stream: "), so we can + * target that exact invocation. + */ +async function investigateCustomResource( + err: ResourceError, + sdk: SDK, + debug: (msg: string) => Promise, +): Promise { + if (!err.logicalId) { + await debug('Custom resource investigation: no logical ID available'); + return []; + } + const stackName = err.stackArn; + if (!stackName) { + await debug('Custom resource investigation: no stack ARN available'); + return []; + } + + const cfn = sdk.cloudFormation(); + const lambda = sdk.lambda(); + const cwl = sdk.cloudWatchLogs(); + const region = sdk.currentRegion; + + // Fetch the template once: it carries both the ServiceToken and (for functions defined in + // this stack) the backing function's LoggingConfig. The template survives rollback even + // when the function itself is deleted, so it's the most reliable source for the log group. + const template = await getStackTemplate(cfn, stackName, debug); + if (!template) { + return []; + } + + const serviceToken = template.Resources?.[err.logicalId]?.Properties?.ServiceToken; + if (serviceToken === undefined) { + await debug(`Custom resource investigation: no ServiceToken on resource "${err.logicalId}"`); + return []; + } + + const referencedLogicalId = serviceTokenReferencedLogicalId(serviceToken); + const functionName = await resolveServiceTokenToFunctionName(cfn, stackName, serviceToken, referencedLogicalId, debug); + if (!functionName) { + await debug('Custom resource investigation: could not resolve ServiceToken to a Lambda function'); + return []; + } + + // Prefer the function's configured log group as derived from the template (rollback-proof). + // Only resolvable when the function is defined in this stack (ServiceToken is a Ref/GetAtt). + const templateLogGroup = referencedLogicalId + ? await resolveConfiguredLogGroup(cfn, stackName, template, referencedLogicalId, debug) + : undefined; + + // The cfn-response library writes the failing log stream name into the status reason + // (and uses it as the default physical ID). Targeting it gives the exact invocation. + const streamName = extractLogStreamName(err.message) ?? logStreamNameFromPhysicalId(err.physicalId); + + return fetchCustomResourceLogs(cwl, lambda, functionName, templateLogGroup, streamName, err.timestamp, region, debug); +} + +/** + * Fetch and parse the stack's (original) template. Returns `undefined` if it can't be read. + */ +async function getStackTemplate( + cfn: ICloudFormationClient, + stackName: string, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await cfn.getTemplate({ StackName: stackName }); + if (!resp.TemplateBody) { + await debug('Custom resource investigation: empty template body'); + return undefined; + } + return deserializeStructure(resp.TemplateBody); + } catch (e: any) { + await debug(`Custom resource investigation: failed to read template: ${e.message}`); + return undefined; + } +} + +/** + * Resolve the backing Lambda's configured log group from the template. + * + * The template survives rollback (when the live function may not), so it is the preferred + * source. Handles the function's `LoggingConfig.LogGroup` as: + * - a literal string (returned directly); + * - a `Ref` to an `AWS::Logs::LogGroup` with a literal `LogGroupName` (returned directly); + * - a `Ref` to an `AWS::Logs::LogGroup` whose name CloudFormation generates (the common CDK + * case) — resolved to its physical name via `describeStackResources`, which still returns + * RETAINed/orphaned resources after a rollback. + * + * Returns `undefined` when there is no configured log group or it can't be resolved + * (caller then falls back to the live function configuration). + */ +async function resolveConfiguredLogGroup( + cfn: ICloudFormationClient, + stackName: string, + template: any, + functionLogicalId: string, + debug: (msg: string) => Promise, +): Promise { + const logGroup = template.Resources?.[functionLogicalId]?.Properties?.LoggingConfig?.LogGroup; + if (typeof logGroup === 'string') { + return logGroup; + } + if (logGroup && typeof logGroup === 'object' && typeof logGroup.Ref === 'string') { + const referenced = template.Resources?.[logGroup.Ref]; + const name = referenced?.Properties?.LogGroupName; + if (typeof name === 'string') { + return name; + } + // No explicit name (CloudFormation generates it) — resolve the log-group resource's + // physical name, which is the log group name. + return resolvePhysicalId(cfn, stackName, logGroup.Ref, debug); + } + return undefined; +} + +/** + * Resolve a resource's physical ID by logical ID. Returns `undefined` on failure. + */ +async function resolvePhysicalId( + cfn: ICloudFormationClient, + stackName: string, + logicalId: string, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await cfn.describeStackResources({ StackName: stackName, LogicalResourceId: logicalId }); + return resp.StackResources?.[0]?.PhysicalResourceId; + } catch (e: any) { + await debug(`Custom resource investigation: failed to resolve physical ID for "${logicalId}": ${e.message}`); + return undefined; + } +} + +/** + * Resolve a `ServiceToken` value (a literal ARN, an `Fn::GetAtt`, or a `Ref`) to a Lambda + * function name. Intrinsics are resolved to a physical ID via `describeStackResources`. + */ +async function resolveServiceTokenToFunctionName( + cfn: ICloudFormationClient, + stackName: string, + serviceToken: any, + referencedLogicalId: string | undefined, + debug: (msg: string) => Promise, +): Promise { + if (referencedLogicalId) { + const physicalId = await resolvePhysicalId(cfn, stackName, referencedLogicalId, debug); + return physicalId ? functionNameFromArnOrName(physicalId) : undefined; + } + + if (typeof serviceToken === 'string') { + return functionNameFromArnOrName(serviceToken); + } + + await debug('Custom resource investigation: unsupported ServiceToken shape'); + return undefined; +} + +/** + * If a ServiceToken is an `Fn::GetAtt` or `Ref` intrinsic, return the referenced logical ID. + */ +export function serviceTokenReferencedLogicalId(serviceToken: any): string | undefined { + if (!serviceToken || typeof serviceToken !== 'object') { + return undefined; + } + const getAtt = serviceToken['Fn::GetAtt']; + // Array form (JSON / CDK output): ["LogicalId", "Arn"]. + if (Array.isArray(getAtt) && typeof getAtt[0] === 'string') { + return getAtt[0]; + } + // String short-form (how YAML `!GetAtt LogicalId.Arn` deserializes): "LogicalId.Attr". + if (typeof getAtt === 'string') { + return getAtt.split('.')[0] || undefined; + } + if (typeof serviceToken.Ref === 'string') { + return serviceToken.Ref; + } + return undefined; +} + +/** + * Extract a Lambda function name from a function ARN or a bare name. + * + * Returns `undefined` for non-Lambda ARNs (e.g. an SNS-topic ServiceToken). + */ +export function functionNameFromArnOrName(arnOrName: string): string | undefined { + const arnMatch = arnOrName.match(/^arn:[^:]+:lambda:[^:]*:[^:]*:function:([^:]+)/); + if (arnMatch) { + return arnMatch[1]; + } + if (arnOrName.startsWith('arn:')) { + return undefined; + } + return arnOrName || undefined; +} + +/** + * Extract the log stream name out of a cfn-response failure reason + * ("See the details in CloudWatch Log Stream: "). + */ +export function extractLogStreamName(message: string | undefined): string | undefined { + const match = message?.match(/CloudWatch Log Stream:\s*(\S+)/); + return match ? match[1] : undefined; +} + +/** + * cfn-response defaults the physical ID to the log stream name. Use it only when it looks + * like a Lambda log stream (`YYYY/MM/DD/...`), so a user-provided physical ID isn't mistaken + * for one. + */ +function logStreamNameFromPhysicalId(physicalId: string | undefined): string | undefined { + return physicalId && /^\d{4}\/\d{2}\/\d{2}\/.+/.test(physicalId) ? physicalId : undefined; +} + +async function fetchCustomResourceLogs( + cwl: ICloudWatchLogsClient, + lambda: ILambdaClient, + functionName: string, + templateLogGroup: string | undefined, + streamName: string | undefined, + timestamp: Date | undefined, + region: string, + debug: (msg: string) => Promise, +): Promise { + const failureTime = timestamp?.valueOf(); + const startTime = failureTime !== undefined ? failureTime - LOG_WINDOW_BEFORE_MS : Date.now() - FALLBACK_LOG_WINDOW_MS; + const endTime = failureTime !== undefined ? failureTime + LOG_WINDOW_AFTER_MS : undefined; + + // Convention first; only pay for the configured group if the convention group is empty. + const conventionGroup = `/aws/lambda/${functionName}`; + let messages = await fetchLogLines(cwl, conventionGroup, streamName, startTime, endTime, debug); + // The group we point the user at. Once we learn the function's configured log group, prefer + // it for the link even if it too is empty — it's where the function actually logs, whereas + // the convention group may not exist for advanced-logging functions. + let logGroup = conventionGroup; + + if (messages === undefined) { + // Prefer the template-derived group (rollback-proof); fall back to the live function + // configuration only when the template couldn't tell us (e.g. unresolvable intrinsic, or + // the function is defined outside this stack). + const configuredGroup = templateLogGroup ?? await configuredLogGroup(lambda, functionName, debug); + if (configuredGroup && configuredGroup !== conventionGroup) { + logGroup = configuredGroup; + messages = await fetchLogLines(cwl, configuredGroup, streamName, startTime, endTime, debug); + } + } + + // Lead with the log group so the user can tell which function these logs belong to + // (the formatter renders messages but not `source`, and the link is URL-encoded). + const header = `Logs from ${logGroup}:`; + const body = messages ?? ['No log events found around the time of failure. The function may not have produced output, or logging may not be configured.']; + + return [{ + source: 'Custom Resource Lambda Logs', + messages: [header, ...body], + link: cloudWatchLogsConsoleUrl(region, logGroup), + linkLabel: 'Logs', + }]; +} + +/** + * Fetch and trim recent log lines from a group. Returns `undefined` when the group has no + * events in the window (so the caller can try a different group). + */ +async function fetchLogLines( + cwl: ICloudWatchLogsClient, + logGroup: string, + streamName: string | undefined, + startTime: number, + endTime: number | undefined, + debug: (msg: string) => Promise, +): Promise { + // Try the targeted stream first (most relevant), but the cfn-response stream name can be + // stale on update/rollback failures (it's pinned to the original create invocation). If + // the targeted query finds nothing, fall back to a group-wide scan over the time window so + // a stale stream can't hide the actual failing invocation's logs. + if (streamName) { + const targeted = await filterLogLines(cwl, logGroup, streamName, startTime, endTime, debug); + if (targeted !== undefined) { + return targeted; + } + } + return filterLogLines(cwl, logGroup, undefined, startTime, endTime, debug); +} + +async function filterLogLines( + cwl: ICloudWatchLogsClient, + logGroup: string, + streamName: string | undefined, + startTime: number, + endTime: number | undefined, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await cwl.filterLogEvents({ + logGroupName: logGroup, + startTime, + ...(endTime !== undefined ? { endTime } : {}), + limit: 1000, + ...(streamName ? { logStreamNames: [streamName] } : {}), + }); + const events = resp.events ?? []; + if (events.length === 0) { + await debug(`Custom resource investigation: no log events in ${logGroup}${streamName ? ` (stream: ${streamName})` : ''}`); + return undefined; + } + // Lambda log events have a known structure (text- or JSON-format), unlike raw ECS + // container output, so we normalize them into readable lines before trimming. + return trimToRecentLines(parseLambdaLogEvents(events)); + } catch (e: any) { + await debug(`Custom resource investigation: failed to fetch logs from ${logGroup}: ${e.message}`); + return undefined; + } +} + +/** Read the function's configured (advanced-logging) log group, if any. */ +async function configuredLogGroup( + lambda: ILambdaClient, + functionName: string, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await lambda.getFunctionConfiguration({ FunctionName: functionName }); + return resp.LoggingConfig?.LogGroup; + } catch (e: any) { + await debug(`Custom resource investigation: failed to read function configuration: ${e.message}`); + return undefined; + } +} + // CloudWatch console uses double-URI-encoding with '$' replacing '%' for the log group in the fragment. function cloudWatchLogsConsoleUrl(region: string, logGroup: string): string { const encodedLogGroup = encodeURIComponent(encodeURIComponent(logGroup)).replace(/%/g, '$'); diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts b/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts index 35b265e7b..943216101 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts @@ -43,6 +43,19 @@ export interface ResourceError { * Error code of the resource */ readonly errorCode?: string; + + /** + * Timestamp of the failure event, if known. + * + * Used to bound exploratory lookups (e.g. CloudWatch Logs queries) to the time + * around the failure. This matters for resources whose logs span multiple + * deployments (creates, updates, rollbacks), where the most recent invocation + * is not necessarily the one that failed. + * + * Only populated for errors derived from stack events; absent for change-set + * and early-validation errors. + */ + readonly timestamp?: Date; } /** @@ -121,6 +134,7 @@ function errorFromEvent(ev: ResourceEvent): ResourceError { stackArn: ev.event.StackId ?? '', errorCode: extractErrorCode(ev.event), physicalId: ev.event.PhysicalResourceId, + timestamp: ev.event.Timestamp, }; } diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index cd4dd166b..1d2092fbc 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -1,3 +1,4 @@ +import { DescribeStackResourcesCommand, GetTemplateCommand } from '@aws-sdk/client-cloudformation'; import { FilterLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; import { DescribeServicesCommand, @@ -5,11 +6,21 @@ import { DescribeTasksCommand, ListTasksCommand, } from '@aws-sdk/client-ecs'; -import { investigateResource, parseEcsServiceIdentifier } from '../../../lib/api/diagnosing/resource-investigation'; +import { GetFunctionConfigurationCommand } from '@aws-sdk/client-lambda'; +import { + extractLogStreamName, + functionNameFromArnOrName, + investigateResource, + parseEcsServiceIdentifier, + parseLambdaLogEvents, + serviceTokenReferencedLogicalId, +} from '../../../lib/api/diagnosing/resource-investigation'; import type { ResourceError } from '../../../lib/api/stack-events/resource-errors'; import { + mockCloudFormationClient, mockCloudWatchClient, mockECSClient, + mockLambdaClient, MockSdk, restoreSdkMocksToDefault, } from '../../_helpers/mock-sdk'; @@ -387,3 +398,460 @@ describe('investigateResource for AWS::ECS::Service', () => { expect(cwl?.messages[0]).toMatch(/No CloudWatch Logs found/); }); }); + +describe('serviceTokenReferencedLogicalId', () => { + test('extracts the logical ID from an Fn::GetAtt', () => { + expect(serviceTokenReferencedLogicalId({ 'Fn::GetAtt': ['MyFn', 'Arn'] })).toEqual('MyFn'); + }); + + test('extracts the logical ID from a Ref', () => { + expect(serviceTokenReferencedLogicalId({ Ref: 'MyFn' })).toEqual('MyFn'); + }); + + test('returns undefined for a literal string', () => { + expect(serviceTokenReferencedLogicalId('arn:aws:lambda:us-east-1:123456789012:function:my-fn')).toBeUndefined(); + }); + + test('returns undefined for an unrecognized object', () => { + expect(serviceTokenReferencedLogicalId({ 'Fn::Sub': 'x' })).toBeUndefined(); + }); +}); + +describe('functionNameFromArnOrName', () => { + test('parses the name from a function ARN', () => { + expect(functionNameFromArnOrName('arn:aws:lambda:us-east-1:123456789012:function:my-fn')).toEqual('my-fn'); + }); + + test('parses the name from a function ARN with version suffix', () => { + expect(functionNameFromArnOrName('arn:aws:lambda:us-east-1:123456789012:function:my-fn:42')).toEqual('my-fn'); + }); + + test('handles non-aws partitions', () => { + expect(functionNameFromArnOrName('arn:aws-cn:lambda:cn-north-1:123456789012:function:my-fn')).toEqual('my-fn'); + }); + + test('passes through a bare function name', () => { + expect(functionNameFromArnOrName('my-fn')).toEqual('my-fn'); + }); + + test('returns undefined for a non-lambda ARN', () => { + expect(functionNameFromArnOrName('arn:aws:sns:us-east-1:123456789012:my-topic')).toBeUndefined(); + }); +}); + +describe('extractLogStreamName', () => { + test('extracts the cfn-response stream from the failure reason', () => { + expect(extractLogStreamName('See the details in CloudWatch Log Stream: 2026/06/15/[$LATEST]abc123')) + .toEqual('2026/06/15/[$LATEST]abc123'); + }); + + test('returns undefined when no stream is present', () => { + expect(extractLogStreamName('Some other failure reason')).toBeUndefined(); + }); + + test('returns undefined for an undefined message', () => { + expect(extractLogStreamName(undefined)).toBeUndefined(); + }); +}); + +describe('parseLambdaLogEvents', () => { + const msgs = (events: Array<{ message?: string }>) => parseLambdaLogEvents(events).map(e => e.message); + + test('strips the timestamp/requestId prefix from text-format lines and aligns the level', () => { + expect(msgs([ + { message: '2026-06-19T18:25:11.112Z\treq-1\tERROR\tBoom: it failed' }, + { message: '2026-06-19T18:25:11.113Z\treq-1\tINFO\tall good' }, + ])).toEqual([ + 'ERROR Boom: it failed', + 'INFO all good', + ]); + }); + + test('drops Lambda platform boilerplate (INIT_START/START/END/REPORT)', () => { + expect(msgs([ + { message: 'INIT_START Runtime Version: nodejs:20.v101' }, + { message: 'START RequestId: req-1 Version: $LATEST' }, + { message: '2026-06-19T18:25:11.112Z\treq-1\tERROR\tthe real error' }, + { message: 'END RequestId: req-1' }, + { message: 'REPORT RequestId: req-1\tDuration: 1009 ms' }, + ])).toEqual(['ERROR the real error']); + }); + + test('keeps INFO-level application output (failure detail often rides in INFO)', () => { + // The cfn-response failure body is logged at INFO; it must survive. + const out = msgs([ + { message: '2026-06-19T18:25:11.113Z\treq-1\tINFO\tResponse body: {"Status":"FAILED","Data":{"error":"x"}}' }, + ]); + expect(out).toEqual(['INFO Response body: {"Status":"FAILED","Data":{"error":"x"}}']); + }); + + test('normalizes JSON-format events to LEVEL + message', () => { + expect(msgs([ + { message: '{"timestamp":"2026-06-19T18:25:11.112Z","level":"ERROR","requestId":"req-1","message":"Boom: it failed"}' }, + { message: '{"level":"INFO","message":"all good"}' }, + ])).toEqual([ + 'ERROR Boom: it failed', + 'INFO all good', + ]); + }); + + test('drops JSON platform events', () => { + expect(msgs([ + { message: '{"time":"2026-06-19T18:25:11Z","type":"platform.report","record":{"metrics":{"maxMemoryUsedMB":74}}}' }, + { message: '{"level":"ERROR","message":"kept"}' }, + ])).toEqual(['ERROR kept']); + }); + + test('renders a JSON error envelope with stack trace', () => { + expect(msgs([ + { message: '{"level":"ERROR","errorMessage":"KeyError: foo","stackTrace":[" at a"," at b"]}' }, + ])).toEqual(['ERROR KeyError: foo\n at a\n at b']); + }); + + test('passes through malformed JSON verbatim', () => { + expect(msgs([{ message: '{not valid json' }])).toEqual(['{not valid json']); + }); + + test('passes through plain/unstructured lines verbatim', () => { + expect(msgs([ + { message: 'a plain stdout line with no structure' }, + { message: ' at SomeStackFrame (file.js:1:2)' }, + ])).toEqual([ + 'a plain stdout line with no structure', + ' at SomeStackFrame (file.js:1:2)', + ]); + }); + + test('handles a mixed batch of text, JSON, platform, and plain lines', () => { + expect(msgs([ + { message: 'START RequestId: req-1 Version: $LATEST' }, + { message: '2026-06-19T18:25:11.112Z\treq-1\tWARN\ttext line' }, + { message: '{"level":"ERROR","message":"json line"}' }, + { message: 'bare line' }, + ])).toEqual([ + 'WARN text line', + 'ERROR json line', + 'bare line', + ]); + }); + + test('skips events with no message', () => { + expect(msgs([{ message: undefined }, { message: 'kept' }])).toEqual(['kept']); + }); +}); + +describe('investigateResource for custom resources', () => { + const STACK_ARN = 'arn:aws:cloudformation:us-east-1:123456789012:stack/MyStack/abc'; + + function customResourceError(overrides: Partial = {}): ResourceError { + return { + stackArn: STACK_ARN, + parentStackLogicalIds: [], + logicalId: 'MyCustomResource', + resourceType: 'Custom::MyThing', + message: 'See the details in CloudWatch Log Stream: 2026/06/15/[$LATEST]streamabc', + ...overrides, + }; + } + + function templateWith(serviceToken: any): string { + return JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: serviceToken } }, + }, + }); + } + + test('reads the configured log group from the template without a live function call (rollback-proof)', async () => { + // ServiceToken is a GetAtt to a function defined in this stack whose LoggingConfig.LogGroup + // is a literal. The function may be deleted by rollback, so we must NOT need getFunctionConfiguration. + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: { 'Fn::GetAtt': ['ProviderFn', 'Arn'] } } }, + ProviderFn: { Type: 'AWS::Lambda::Function', Properties: { LoggingConfig: { LogGroup: '/custom/grp' } } }, + }, + }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ LogicalResourceId: 'ProviderFn', PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn' } as any], + }); + // Convention group empty; the configured group (from template) has the logs. + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/custom/grp' }).resolves({ events: [{ message: 'configured group line' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + // The whole point: log group came from the template, so no live function call was needed. + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['Logs from /custom/grp:', 'configured group line']); + }); + + test('resolves a template LoggingConfig.LogGroup given as a Ref to a log-group resource', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: { 'Fn::GetAtt': ['ProviderFn', 'Arn'] } } }, + ProviderFn: { Type: 'AWS::Lambda::Function', Properties: { LoggingConfig: { LogGroup: { Ref: 'FnLogs' } } } }, + FnLogs: { Type: 'AWS::Logs::LogGroup', Properties: { LogGroupName: '/explicit/group/name' } }, + }, + }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ LogicalResourceId: 'ProviderFn', PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn' } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/explicit/group/name' }).resolves({ events: [{ message: 'x' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages[0]).toEqual('Logs from /explicit/group/name:'); + }); + + test('resolves a Ref log group with no explicit name via describeStackResources (CDK default)', async () => { + // The common CDK case: the AWS::Logs::LogGroup has no LogGroupName, so CloudFormation + // generates the physical name. We must resolve it via describeStackResources (which still + // returns the RETAINed group after rollback), not give up. + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: { 'Fn::GetAtt': ['ProviderFn', 'Arn'] } } }, + ProviderFn: { Type: 'AWS::Lambda::Function', Properties: { LoggingConfig: { LogGroup: { Ref: 'FnLogs' } } } }, + FnLogs: { Type: 'AWS::Logs::LogGroup', Properties: { RetentionInDays: 7 } }, // no LogGroupName + }, + }), + }); + // ServiceToken's ProviderFn and the log group's FnLogs are resolved by logical ID. + mockCloudFormationClient.on(DescribeStackResourcesCommand, { StackName: STACK_ARN, LogicalResourceId: 'ProviderFn' }) + .resolves({ StackResources: [{ LogicalResourceId: 'ProviderFn', PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn' } as any] }); + mockCloudFormationClient.on(DescribeStackResourcesCommand, { StackName: STACK_ARN, LogicalResourceId: 'FnLogs' }) + .resolves({ StackResources: [{ LogicalResourceId: 'FnLogs', PhysicalResourceId: 'MyStack-FnLogs-GENERATED123' } as any] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: 'MyStack-FnLogs-GENERATED123' }).resolves({ events: [{ message: 'advanced logging line' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['Logs from MyStack-FnLogs-GENERATED123:', 'advanced logging line']); + }); + + test('resolves a literal-ARN ServiceToken and fetches the failing stream from the convention group', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'Traceback: KeyError "Foo"' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { + logGroupName: '/aws/lambda/my-cr-fn', + logStreamNames: ['2026/06/15/[$LATEST]streamabc'], + }); + // Convention group had events, so we must NOT have called getFunctionConfiguration. + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs).toBeDefined(); + expect(logs!.messages).toEqual(['Logs from /aws/lambda/my-cr-fn:', 'Traceback: KeyError "Foo"']); + expect(logs!.linkLabel).toEqual('Logs'); + expect(logs!.link).toContain('logsV2:log-groups'); + }); + + test('resolves an Fn::GetAtt ServiceToken via describeStackResources', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith({ 'Fn::GetAtt': ['ProviderFn', 'Arn'] }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ + LogicalResourceId: 'ProviderFn', + PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn', + } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'log line' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudFormationClient).toHaveReceivedCommandWith(DescribeStackResourcesCommand, { + StackName: STACK_ARN, + LogicalResourceId: 'ProviderFn', + }); + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }); + expect(result.find(c => c.source === 'Custom Resource Lambda Logs')).toBeDefined(); + }); + + test('resolves a Ref ServiceToken via describeStackResources', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ TemplateBody: templateWith({ Ref: 'ProviderFn' }) }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ + LogicalResourceId: 'ProviderFn', + PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:ref-fn', + } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'x' }] }); + + await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudFormationClient).toHaveReceivedCommandWith(DescribeStackResourcesCommand, { + StackName: STACK_ARN, + LogicalResourceId: 'ProviderFn', + }); + }); + + test('falls back to the LoggingConfig log group when the convention group is empty', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + // Convention group empty; custom group has the logs. + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/my-cr-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/custom/log/group' }).resolves({ events: [{ message: 'custom group line' }] }); + mockLambdaClient.on(GetFunctionConfigurationCommand).resolves({ LoggingConfig: { LogGroup: '/custom/log/group' } }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockLambdaClient).toHaveReceivedCommandWith(GetFunctionConfigurationCommand, { FunctionName: 'my-cr-fn' }); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['Logs from /custom/log/group:', 'custom group line']); + expect(logs!.link).toContain('$252Fcustom'); // double-encoded /custom... + }); + + test('bounds the log query to a window around the failure timestamp', async () => { + const failureTime = new Date('2026-06-15T12:00:00.000Z'); + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'rollback failure' }] }); + + await investigateResource(customResourceError({ timestamp: failureTime }), sdk, debug); + + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { + logGroupName: '/aws/lambda/my-cr-fn', + startTime: failureTime.valueOf() - 2 * 60 * 1000, + endTime: failureTime.valueOf() + 15 * 60 * 1000, + }); + }); + + test('handles AWS::CloudFormation::CustomResource type', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { + Type: 'AWS::CloudFormation::CustomResource', + Properties: { ServiceToken: 'arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn' }, + }, + }, + }), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'x' }] }); + + const result = await investigateResource( + customResourceError({ resourceType: 'AWS::CloudFormation::CustomResource' }), sdk, debug, + ); + expect(result.find(c => c.source === 'Custom Resource Lambda Logs')).toBeDefined(); + }); + + test('emits a no-logs context when no events are found in either group', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [] }); + mockLambdaClient.on(GetFunctionConfigurationCommand).resolves({}); // no custom group + + const result = await investigateResource(customResourceError(), sdk, debug); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages.join('\n')).toMatch(/No log events found/); + }); + + test('returns empty when the resource has no ServiceToken', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ Resources: { MyCustomResource: { Type: 'Custom::MyThing', Properties: {} } } }), + }); + + const result = await investigateResource(customResourceError(), sdk, debug); + expect(result).toEqual([]); + }); + + test('returns empty when the ServiceToken is not a Lambda', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:sns:us-east-1:123456789012:my-topic'), + }); + + const result = await investigateResource(customResourceError(), sdk, debug); + expect(result).toEqual([]); + }); + + test('returns empty when no logical ID is available', async () => { + const result = await investigateResource(customResourceError({ logicalId: undefined }), sdk, debug); + expect(result).toEqual([]); + }); + + test('resolves a YAML string-form Fn::GetAtt ServiceToken', async () => { + // YAML `!GetAtt ProviderFn.Arn` deserializes to { 'Fn::GetAtt': 'ProviderFn.Arn' } (a string). + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith({ 'Fn::GetAtt': 'ProviderFn.Arn' }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ + LogicalResourceId: 'ProviderFn', + PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:yaml-fn', + } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'x' }] }); + + await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudFormationClient).toHaveReceivedCommandWith(DescribeStackResourcesCommand, { + StackName: STACK_ARN, + LogicalResourceId: 'ProviderFn', + }); + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { logGroupName: '/aws/lambda/yaml-fn' }); + }); + + test('falls back to a group-wide scan when the targeted stream has no events (stale stream)', async () => { + // Stream-scoped query returns nothing (e.g. stale create-time stream on an update failure); + // the un-scoped group scan finds the actual failing invocation's logs. + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + // Call 1 (targeted stream) returns nothing; call 2 (un-scoped group scan) finds the logs. + // Sequenced with resolvesOnce so the fallback path is genuinely required (partial input + // matchers would overlap and let the targeted call satisfy the group-scan mock). + mockCloudWatchClient.on(FilterLogEventsCommand) + .resolvesOnce({ events: [] }) + .resolves({ events: [{ message: 'actual failure on update' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['Logs from /aws/lambda/my-cr-fn:', 'actual failure on update']); + // It must have tried the targeted stream first... + expect(mockCloudWatchClient).toHaveReceivedNthCommandWith(1, FilterLogEventsCommand, { + logGroupName: '/aws/lambda/my-cr-fn', + logStreamNames: ['2026/06/15/[$LATEST]streamabc'], + }); + // ...then a second, un-scoped group scan (no logStreamNames). + expect(mockCloudWatchClient).toHaveReceivedCommandTimes(FilterLogEventsCommand, 2); + const secondCall = mockCloudWatchClient.commandCalls(FilterLogEventsCommand)[1].args[0].input as any; + expect(secondCall.logStreamNames).toBeUndefined(); + }); + + test('links to the configured log group (not the convention group) when both are empty', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + // Every filterLogEvents (convention + configured, targeted + scan) returns empty. + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [] }); + mockLambdaClient.on(GetFunctionConfigurationCommand).resolves({ LoggingConfig: { LogGroup: '/custom/log/group' } }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages.join('\n')).toMatch(/No log events found/); + // The header and link must point at the configured group, where the function actually logs. + expect(logs!.messages[0]).toEqual('Logs from /custom/log/group:'); + expect(logs!.link).toContain('$252Fcustom$252Flog$252Fgroup'); + expect(logs!.link).not.toContain('my-cr-fn'); + }); +});