From 10e0fd8306a4e459ce3a88bb1d5ad16810a1ad3c Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Thu, 18 Jun 2026 17:15:32 -0400 Subject: [PATCH 1/7] feat(toolkit-lib): surface custom resource Lambda logs on deployment failure When a custom resource (Custom::* or AWS::CloudFormation::CustomResource) fails to deploy, fetch the backing Lambda's CloudWatch logs and surface them as additional diagnostic context, mirroring the ECS service investigation. - Resolve the backing Lambda by reading the resource's ServiceToken from the stack's original template (literal ARN, Fn::GetAtt, or Ref via describeStackResources). - Derive the log group from the /aws/lambda/ convention; only call getFunctionConfiguration to read a custom LoggingConfig.LogGroup if the convention group has no events. (cfn-response usage does not imply the default group; advanced logging controls can override it.) - Target the exact failing invocation by extracting the log stream name from the cfn-response status reason ("See the details in CloudWatch Log Stream: "). - Bound the query to a window around the failure event timestamp so update and rollback failures resolve the right invocation, not the latest stream. Adds an optional ResourceError.timestamp for this. - Add getFunctionConfiguration to the Lambda SDK client. All exploratory calls are best-effort: failures are logged at debug level and never break diagnosis. Verified end-to-end against a live cfn-response custom resource deployment failure. --- .../toolkit-lib/lib/api/aws-auth/sdk.ts | 6 + .../api/diagnosing/resource-investigation.ts | 286 +++++++++++++++++- .../lib/api/stack-events/resource-errors.ts | 14 + .../diagnosing/resource-investigation.test.ts | 238 ++++++++++++++- 4 files changed, 537 insertions(+), 7 deletions(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts b/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts index 3863e4c1c..d83a2ce34 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/aws-auth/sdk.ts @@ -318,6 +318,9 @@ import { type ListAliasesCommandOutput, } from '@aws-sdk/client-kms'; import { + GetFunctionConfigurationCommand, + type GetFunctionConfigurationCommandInput, + type GetFunctionConfigurationCommandOutput, InvokeCommand, type InvokeCommandInput, type InvokeCommandOutput, @@ -598,6 +601,7 @@ export interface IKMSClient { } export interface ILambdaClient { + getFunctionConfiguration(input: GetFunctionConfigurationCommandInput): Promise; invokeCommand(input: InvokeCommandInput): Promise; publishVersion(input: PublishVersionCommandInput): Promise; updateAlias(input: UpdateAliasCommandInput): Promise; @@ -1048,6 +1052,8 @@ export class SDK { public lambda(): ILambdaClient { const client = new LambdaClient(this.config); return { + getFunctionConfiguration: (input: GetFunctionConfigurationCommandInput): Promise => + client.send(new GetFunctionConfigurationCommand(input)), invokeCommand: (input: InvokeCommandInput): Promise => client.send(new InvokeCommand(input)), publishVersion: (input: PublishVersionCommandInput): Promise => client.send(new PublishVersionCommand(input)), diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index 260234e64..948efc10a 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -1,5 +1,6 @@ import type { AdditionalDiagnosticContext } from '../../actions/diagnose'; -import type { ICloudWatchLogsClient, IECSClient, SDK } from '../aws-auth/sdk'; +import { deserializeStructure } from '../../util'; +import type { ICloudFormationClient, ICloudWatchLogsClient, IECSClient, ILambdaClient, SDK } from '../aws-auth/sdk'; import type { ResourceError } from '../stack-events/resource-errors'; /** @@ -38,12 +39,14 @@ export async function investigateResource( debug: (msg: string) => Promise, options: InvestigateOptions = {}, ): Promise { - switch (err.resourceType) { - case 'AWS::ECS::Service': - return investigateEcsService(err, sdk, debug, options); - default: - return []; + const resourceType = err.resourceType ?? ''; + if (resourceType === 'AWS::ECS::Service') { + return investigateEcsService(err, sdk, debug, options); } + if (resourceType === 'AWS::CloudFormation::CustomResource' || resourceType.startsWith('Custom::')) { + return investigateCustomResource(err, sdk, debug); + } + return []; } async function investigateEcsService( @@ -391,6 +394,277 @@ async function fetchRecentLogs( } } +/** + * How far before/after the failure event to search CloudWatch Logs when we have a timestamp. + * + * The pre-window absorbs minor clock skew; the post-window covers output the function + * emits while it runs after the CloudFormation event was recorded. + */ +const LOG_WINDOW_BEFORE_MS = 2 * 60 * 1000; +const LOG_WINDOW_AFTER_MS = 15 * 60 * 1000; + +/** Fallback look-back when no failure timestamp is available (matches the ECS path). */ +const FALLBACK_LOG_WINDOW_MS = 30 * 60 * 1000; + +/** + * Investigate a failed custom resource by surfacing its backing Lambda's CloudWatch logs. + * + * The CloudFormation event does not name the backing function — only the resource's + * `ServiceToken` (in the template) does. We resolve that to a function name, derive the + * log group (the `/aws/lambda/` convention, confirmed via the function's LoggingConfig + * only if the convention turns up empty), and fetch the relevant log lines. + * + * When the handler uses the cfn-response library, the failing log stream name is embedded + * in the status reason ("See the details in CloudWatch Log Stream: "), so we can + * target that exact invocation. + */ +async function investigateCustomResource( + err: ResourceError, + sdk: SDK, + debug: (msg: string) => Promise, +): Promise { + if (!err.logicalId) { + await debug('Custom resource investigation: no logical ID available'); + return []; + } + const stackName = err.stackArn; + if (!stackName) { + await debug('Custom resource investigation: no stack ARN available'); + return []; + } + + const cfn = sdk.cloudFormation(); + const lambda = sdk.lambda(); + const cwl = sdk.cloudWatchLogs(); + const region = sdk.currentRegion; + + const serviceToken = await getCustomResourceServiceToken(cfn, stackName, err.logicalId, debug); + if (serviceToken === undefined) { + return []; + } + + const functionName = await resolveServiceTokenToFunctionName(cfn, stackName, serviceToken, debug); + if (!functionName) { + await debug('Custom resource investigation: could not resolve ServiceToken to a Lambda function'); + return []; + } + + // The cfn-response library writes the failing log stream name into the status reason + // (and uses it as the default physical ID). Targeting it gives the exact invocation. + const streamName = extractLogStreamName(err.message) ?? logStreamNameFromPhysicalId(err.physicalId); + + return fetchCustomResourceLogs(cwl, lambda, functionName, streamName, err.timestamp, region, debug); +} + +/** + * Read the failed custom resource's `ServiceToken` property from the stack's original template. + * + * Returns `undefined` if the template can't be read or the resource/property is missing. + */ +async function getCustomResourceServiceToken( + cfn: ICloudFormationClient, + stackName: string, + logicalId: string, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await cfn.getTemplate({ StackName: stackName }); + if (!resp.TemplateBody) { + await debug('Custom resource investigation: empty template body'); + return undefined; + } + const template = deserializeStructure(resp.TemplateBody); + const serviceToken = template?.Resources?.[logicalId]?.Properties?.ServiceToken; + if (serviceToken === undefined) { + await debug(`Custom resource investigation: no ServiceToken on resource "${logicalId}"`); + } + return serviceToken; + } catch (e: any) { + await debug(`Custom resource investigation: failed to read template: ${e.message}`); + return undefined; + } +} + +/** + * Resolve a `ServiceToken` value (a literal ARN, an `Fn::GetAtt`, or a `Ref`) to a Lambda + * function name. Intrinsics are resolved to a physical ID via `describeStackResources`. + */ +async function resolveServiceTokenToFunctionName( + cfn: ICloudFormationClient, + stackName: string, + serviceToken: any, + debug: (msg: string) => Promise, +): Promise { + const referencedLogicalId = serviceTokenReferencedLogicalId(serviceToken); + if (referencedLogicalId) { + try { + const resp = await cfn.describeStackResources({ StackName: stackName, LogicalResourceId: referencedLogicalId }); + const physicalId = resp.StackResources?.[0]?.PhysicalResourceId; + return physicalId ? functionNameFromArnOrName(physicalId) : undefined; + } catch (e: any) { + await debug(`Custom resource investigation: failed to resolve ServiceToken reference "${referencedLogicalId}": ${e.message}`); + return undefined; + } + } + + if (typeof serviceToken === 'string') { + return functionNameFromArnOrName(serviceToken); + } + + await debug('Custom resource investigation: unsupported ServiceToken shape'); + return undefined; +} + +/** + * If a ServiceToken is an `Fn::GetAtt` or `Ref` intrinsic, return the referenced logical ID. + */ +export function serviceTokenReferencedLogicalId(serviceToken: any): string | undefined { + if (!serviceToken || typeof serviceToken !== 'object') { + return undefined; + } + const getAtt = serviceToken['Fn::GetAtt']; + if (Array.isArray(getAtt) && typeof getAtt[0] === 'string') { + return getAtt[0]; + } + if (typeof serviceToken.Ref === 'string') { + return serviceToken.Ref; + } + return undefined; +} + +/** + * Extract a Lambda function name from a function ARN or a bare name. + * + * Returns `undefined` for non-Lambda ARNs (e.g. an SNS-topic ServiceToken). + */ +export function functionNameFromArnOrName(arnOrName: string): string | undefined { + const arnMatch = arnOrName.match(/^arn:[^:]+:lambda:[^:]*:[^:]*:function:([^:]+)/); + if (arnMatch) { + return arnMatch[1]; + } + if (arnOrName.startsWith('arn:')) { + return undefined; + } + return arnOrName || undefined; +} + +/** + * Extract the log stream name out of a cfn-response failure reason + * ("See the details in CloudWatch Log Stream: "). + */ +export function extractLogStreamName(message: string | undefined): string | undefined { + const match = message?.match(/CloudWatch Log Stream:\s*(\S+)/); + return match ? match[1] : undefined; +} + +/** + * cfn-response defaults the physical ID to the log stream name. Use it only when it looks + * like a Lambda log stream (`YYYY/MM/DD/...`), so a user-provided physical ID isn't mistaken + * for one. + */ +function logStreamNameFromPhysicalId(physicalId: string | undefined): string | undefined { + return physicalId && /^\d{4}\/\d{2}\/\d{2}\/.+/.test(physicalId) ? physicalId : undefined; +} + +async function fetchCustomResourceLogs( + cwl: ICloudWatchLogsClient, + lambda: ILambdaClient, + functionName: string, + streamName: string | undefined, + timestamp: Date | undefined, + region: string, + debug: (msg: string) => Promise, +): Promise { + const failureTime = timestamp?.valueOf(); + const startTime = failureTime !== undefined ? failureTime - LOG_WINDOW_BEFORE_MS : Date.now() - FALLBACK_LOG_WINDOW_MS; + const endTime = failureTime !== undefined ? failureTime + LOG_WINDOW_AFTER_MS : undefined; + + // Convention first; only pay for getFunctionConfiguration if the convention group is empty. + const conventionGroup = `/aws/lambda/${functionName}`; + let messages = await fetchLogLines(cwl, conventionGroup, streamName, startTime, endTime, debug); + let logGroup = conventionGroup; + + if (messages === undefined) { + const configuredGroup = await configuredLogGroup(lambda, functionName, debug); + if (configuredGroup && configuredGroup !== conventionGroup) { + const fromConfigured = await fetchLogLines(cwl, configuredGroup, streamName, startTime, endTime, debug); + if (fromConfigured !== undefined) { + messages = fromConfigured; + logGroup = configuredGroup; + } + } + } + + if (messages === undefined) { + return [{ + source: 'Custom Resource Lambda Logs', + messages: ['No log events found around the time of failure. The function may not have produced output, or logging may not be configured.'], + link: cloudWatchLogsConsoleUrl(region, logGroup), + linkLabel: 'Logs', + }]; + } + + return [{ + source: 'Custom Resource Lambda Logs', + messages, + link: cloudWatchLogsConsoleUrl(region, logGroup), + linkLabel: 'Logs', + }]; +} + +/** + * Fetch and trim recent log lines from a group. Returns `undefined` when the group has no + * events in the window (so the caller can try a different group). + */ +async function fetchLogLines( + cwl: ICloudWatchLogsClient, + logGroup: string, + streamName: string | undefined, + startTime: number, + endTime: number | undefined, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await cwl.filterLogEvents({ + logGroupName: logGroup, + startTime, + ...(endTime !== undefined ? { endTime } : {}), + limit: 1000, + ...(streamName ? { logStreamNames: [streamName] } : {}), + }); + const events = resp.events ?? []; + if (events.length === 0) { + await debug(`Custom resource investigation: no log events in ${logGroup}${streamName ? ` (stream: ${streamName})` : ''}`); + return undefined; + } + const allMessages = events.map(e => e.message?.trimEnd()).filter((m): m is string => m != null); + const messages = allMessages.slice(-MAX_LOG_LINES); + const omitted = allMessages.length - messages.length; + if (omitted > 0) { + messages.unshift(`... (${omitted} earlier lines omitted)`); + } + return messages; + } catch (e: any) { + await debug(`Custom resource investigation: failed to fetch logs from ${logGroup}: ${e.message}`); + return undefined; + } +} + +/** Read the function's configured (advanced-logging) log group, if any. */ +async function configuredLogGroup( + lambda: ILambdaClient, + functionName: string, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await lambda.getFunctionConfiguration({ FunctionName: functionName }); + return resp.LoggingConfig?.LogGroup; + } catch (e: any) { + await debug(`Custom resource investigation: failed to read function configuration: ${e.message}`); + return undefined; + } +} + // CloudWatch console uses double-URI-encoding with '$' replacing '%' for the log group in the fragment. function cloudWatchLogsConsoleUrl(region: string, logGroup: string): string { const encodedLogGroup = encodeURIComponent(encodeURIComponent(logGroup)).replace(/%/g, '$'); diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts b/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts index 35b265e7b..943216101 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/stack-events/resource-errors.ts @@ -43,6 +43,19 @@ export interface ResourceError { * Error code of the resource */ readonly errorCode?: string; + + /** + * Timestamp of the failure event, if known. + * + * Used to bound exploratory lookups (e.g. CloudWatch Logs queries) to the time + * around the failure. This matters for resources whose logs span multiple + * deployments (creates, updates, rollbacks), where the most recent invocation + * is not necessarily the one that failed. + * + * Only populated for errors derived from stack events; absent for change-set + * and early-validation errors. + */ + readonly timestamp?: Date; } /** @@ -121,6 +134,7 @@ function errorFromEvent(ev: ResourceEvent): ResourceError { stackArn: ev.event.StackId ?? '', errorCode: extractErrorCode(ev.event), physicalId: ev.event.PhysicalResourceId, + timestamp: ev.event.Timestamp, }; } diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index cd4dd166b..1c00ac96f 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -1,3 +1,4 @@ +import { DescribeStackResourcesCommand, GetTemplateCommand } from '@aws-sdk/client-cloudformation'; import { FilterLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; import { DescribeServicesCommand, @@ -5,11 +6,20 @@ import { DescribeTasksCommand, ListTasksCommand, } from '@aws-sdk/client-ecs'; -import { investigateResource, parseEcsServiceIdentifier } from '../../../lib/api/diagnosing/resource-investigation'; +import { GetFunctionConfigurationCommand } from '@aws-sdk/client-lambda'; +import { + extractLogStreamName, + functionNameFromArnOrName, + investigateResource, + parseEcsServiceIdentifier, + serviceTokenReferencedLogicalId, +} from '../../../lib/api/diagnosing/resource-investigation'; import type { ResourceError } from '../../../lib/api/stack-events/resource-errors'; import { + mockCloudFormationClient, mockCloudWatchClient, mockECSClient, + mockLambdaClient, MockSdk, restoreSdkMocksToDefault, } from '../../_helpers/mock-sdk'; @@ -387,3 +397,229 @@ describe('investigateResource for AWS::ECS::Service', () => { expect(cwl?.messages[0]).toMatch(/No CloudWatch Logs found/); }); }); + +describe('serviceTokenReferencedLogicalId', () => { + test('extracts the logical ID from an Fn::GetAtt', () => { + expect(serviceTokenReferencedLogicalId({ 'Fn::GetAtt': ['MyFn', 'Arn'] })).toEqual('MyFn'); + }); + + test('extracts the logical ID from a Ref', () => { + expect(serviceTokenReferencedLogicalId({ Ref: 'MyFn' })).toEqual('MyFn'); + }); + + test('returns undefined for a literal string', () => { + expect(serviceTokenReferencedLogicalId('arn:aws:lambda:us-east-1:123456789012:function:my-fn')).toBeUndefined(); + }); + + test('returns undefined for an unrecognized object', () => { + expect(serviceTokenReferencedLogicalId({ 'Fn::Sub': 'x' })).toBeUndefined(); + }); +}); + +describe('functionNameFromArnOrName', () => { + test('parses the name from a function ARN', () => { + expect(functionNameFromArnOrName('arn:aws:lambda:us-east-1:123456789012:function:my-fn')).toEqual('my-fn'); + }); + + test('parses the name from a function ARN with version suffix', () => { + expect(functionNameFromArnOrName('arn:aws:lambda:us-east-1:123456789012:function:my-fn:42')).toEqual('my-fn'); + }); + + test('handles non-aws partitions', () => { + expect(functionNameFromArnOrName('arn:aws-cn:lambda:cn-north-1:123456789012:function:my-fn')).toEqual('my-fn'); + }); + + test('passes through a bare function name', () => { + expect(functionNameFromArnOrName('my-fn')).toEqual('my-fn'); + }); + + test('returns undefined for a non-lambda ARN', () => { + expect(functionNameFromArnOrName('arn:aws:sns:us-east-1:123456789012:my-topic')).toBeUndefined(); + }); +}); + +describe('extractLogStreamName', () => { + test('extracts the cfn-response stream from the failure reason', () => { + expect(extractLogStreamName('See the details in CloudWatch Log Stream: 2026/06/15/[$LATEST]abc123')) + .toEqual('2026/06/15/[$LATEST]abc123'); + }); + + test('returns undefined when no stream is present', () => { + expect(extractLogStreamName('Some other failure reason')).toBeUndefined(); + }); + + test('returns undefined for an undefined message', () => { + expect(extractLogStreamName(undefined)).toBeUndefined(); + }); +}); + +describe('investigateResource for custom resources', () => { + const STACK_ARN = 'arn:aws:cloudformation:us-east-1:123456789012:stack/MyStack/abc'; + + function customResourceError(overrides: Partial = {}): ResourceError { + return { + stackArn: STACK_ARN, + parentStackLogicalIds: [], + logicalId: 'MyCustomResource', + resourceType: 'Custom::MyThing', + message: 'See the details in CloudWatch Log Stream: 2026/06/15/[$LATEST]streamabc', + ...overrides, + }; + } + + function templateWith(serviceToken: any): string { + return JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: serviceToken } }, + }, + }); + } + + test('resolves a literal-ARN ServiceToken and fetches the failing stream from the convention group', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'Traceback: KeyError "Foo"' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { + logGroupName: '/aws/lambda/my-cr-fn', + logStreamNames: ['2026/06/15/[$LATEST]streamabc'], + }); + // Convention group had events, so we must NOT have called getFunctionConfiguration. + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs).toBeDefined(); + expect(logs!.messages).toEqual(['Traceback: KeyError "Foo"']); + expect(logs!.linkLabel).toEqual('Logs'); + expect(logs!.link).toContain('logsV2:log-groups'); + }); + + test('resolves an Fn::GetAtt ServiceToken via describeStackResources', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith({ 'Fn::GetAtt': ['ProviderFn', 'Arn'] }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ + LogicalResourceId: 'ProviderFn', + PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn', + } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'log line' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudFormationClient).toHaveReceivedCommandWith(DescribeStackResourcesCommand, { + StackName: STACK_ARN, + LogicalResourceId: 'ProviderFn', + }); + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }); + expect(result.find(c => c.source === 'Custom Resource Lambda Logs')).toBeDefined(); + }); + + test('resolves a Ref ServiceToken via describeStackResources', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ TemplateBody: templateWith({ Ref: 'ProviderFn' }) }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ + LogicalResourceId: 'ProviderFn', + PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:ref-fn', + } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'x' }] }); + + await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudFormationClient).toHaveReceivedCommandWith(DescribeStackResourcesCommand, { + StackName: STACK_ARN, + LogicalResourceId: 'ProviderFn', + }); + }); + + test('falls back to the LoggingConfig log group when the convention group is empty', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + // Convention group empty; custom group has the logs. + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/my-cr-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/custom/log/group' }).resolves({ events: [{ message: 'custom group line' }] }); + mockLambdaClient.on(GetFunctionConfigurationCommand).resolves({ LoggingConfig: { LogGroup: '/custom/log/group' } }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockLambdaClient).toHaveReceivedCommandWith(GetFunctionConfigurationCommand, { FunctionName: 'my-cr-fn' }); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['custom group line']); + expect(logs!.link).toContain('$252Fcustom'); // double-encoded /custom... + }); + + test('bounds the log query to a window around the failure timestamp', async () => { + const failureTime = new Date('2026-06-15T12:00:00.000Z'); + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'rollback failure' }] }); + + await investigateResource(customResourceError({ timestamp: failureTime }), sdk, debug); + + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { + logGroupName: '/aws/lambda/my-cr-fn', + startTime: failureTime.valueOf() - 2 * 60 * 1000, + endTime: failureTime.valueOf() + 15 * 60 * 1000, + }); + }); + + test('handles AWS::CloudFormation::CustomResource type', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { + Type: 'AWS::CloudFormation::CustomResource', + Properties: { ServiceToken: 'arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn' }, + }, + }, + }), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'x' }] }); + + const result = await investigateResource( + customResourceError({ resourceType: 'AWS::CloudFormation::CustomResource' }), sdk, debug, + ); + expect(result.find(c => c.source === 'Custom Resource Lambda Logs')).toBeDefined(); + }); + + test('emits a no-logs context when no events are found in either group', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [] }); + mockLambdaClient.on(GetFunctionConfigurationCommand).resolves({}); // no custom group + + const result = await investigateResource(customResourceError(), sdk, debug); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages[0]).toMatch(/No log events found/); + }); + + test('returns empty when the resource has no ServiceToken', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ Resources: { MyCustomResource: { Type: 'Custom::MyThing', Properties: {} } } }), + }); + + const result = await investigateResource(customResourceError(), sdk, debug); + expect(result).toEqual([]); + }); + + test('returns empty when the ServiceToken is not a Lambda', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:sns:us-east-1:123456789012:my-topic'), + }); + + const result = await investigateResource(customResourceError(), sdk, debug); + expect(result).toEqual([]); + }); + + test('returns empty when no logical ID is available', async () => { + const result = await investigateResource(customResourceError({ logicalId: undefined }), sdk, debug); + expect(result).toEqual([]); + }); +}); From 29d0e7f45ceaa028a2994dc8f3955d4c43042394 Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:35:13 -0400 Subject: [PATCH 2/7] fix(toolkit-lib): harden custom resource log resolution from code review Address three correctness/UX issues found in review of the custom-resource diagnostics: - Resolve the YAML string short-form of Fn::GetAtt ("LogicalId.Attr"), not just the JSON array form, so ServiceTokens in hand-authored YAML stacks resolve. - On an empty stream-targeted log query, retry the same group un-scoped over the time window. The cfn-response stream name is pinned to the original create invocation, so on update/rollback failures it is stale; the group-wide scan recovers the actual failing invocation's logs. - When both the convention and configured log groups are empty, link the user to the function's configured LoggingConfig.LogGroup rather than the convention group, which may not exist for advanced-logging functions. Each fix is covered by a test verified via mutation (removing the fix fails it). --- .../api/diagnosing/resource-investigation.ts | 47 ++++++++----- .../diagnosing/resource-investigation.test.ts | 67 +++++++++++++++++++ 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index 948efc10a..b52f9c127 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -523,9 +523,14 @@ export function serviceTokenReferencedLogicalId(serviceToken: any): string | und return undefined; } const getAtt = serviceToken['Fn::GetAtt']; + // Array form (JSON / CDK output): ["LogicalId", "Arn"]. if (Array.isArray(getAtt) && typeof getAtt[0] === 'string') { return getAtt[0]; } + // String short-form (how YAML `!GetAtt LogicalId.Arn` deserializes): "LogicalId.Attr". + if (typeof getAtt === 'string') { + return getAtt.split('.')[0] || undefined; + } if (typeof serviceToken.Ref === 'string') { return serviceToken.Ref; } @@ -582,31 +587,22 @@ async function fetchCustomResourceLogs( // Convention first; only pay for getFunctionConfiguration if the convention group is empty. const conventionGroup = `/aws/lambda/${functionName}`; let messages = await fetchLogLines(cwl, conventionGroup, streamName, startTime, endTime, debug); + // The group we point the user at. Once we learn the function's configured log group, prefer + // it for the link even if it too is empty — it's where the function actually logs, whereas + // the convention group may not exist for advanced-logging functions. let logGroup = conventionGroup; if (messages === undefined) { const configuredGroup = await configuredLogGroup(lambda, functionName, debug); if (configuredGroup && configuredGroup !== conventionGroup) { - const fromConfigured = await fetchLogLines(cwl, configuredGroup, streamName, startTime, endTime, debug); - if (fromConfigured !== undefined) { - messages = fromConfigured; - logGroup = configuredGroup; - } + logGroup = configuredGroup; + messages = await fetchLogLines(cwl, configuredGroup, streamName, startTime, endTime, debug); } } - if (messages === undefined) { - return [{ - source: 'Custom Resource Lambda Logs', - messages: ['No log events found around the time of failure. The function may not have produced output, or logging may not be configured.'], - link: cloudWatchLogsConsoleUrl(region, logGroup), - linkLabel: 'Logs', - }]; - } - return [{ source: 'Custom Resource Lambda Logs', - messages, + messages: messages ?? ['No log events found around the time of failure. The function may not have produced output, or logging may not be configured.'], link: cloudWatchLogsConsoleUrl(region, logGroup), linkLabel: 'Logs', }]; @@ -623,6 +619,27 @@ async function fetchLogLines( startTime: number, endTime: number | undefined, debug: (msg: string) => Promise, +): Promise { + // Try the targeted stream first (most relevant), but the cfn-response stream name can be + // stale on update/rollback failures (it's pinned to the original create invocation). If + // the targeted query finds nothing, fall back to a group-wide scan over the time window so + // a stale stream can't hide the actual failing invocation's logs. + if (streamName) { + const targeted = await filterLogLines(cwl, logGroup, streamName, startTime, endTime, debug); + if (targeted !== undefined) { + return targeted; + } + } + return filterLogLines(cwl, logGroup, undefined, startTime, endTime, debug); +} + +async function filterLogLines( + cwl: ICloudWatchLogsClient, + logGroup: string, + streamName: string | undefined, + startTime: number, + endTime: number | undefined, + debug: (msg: string) => Promise, ): Promise { try { const resp = await cwl.filterLogEvents({ diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index 1c00ac96f..41b590027 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -622,4 +622,71 @@ describe('investigateResource for custom resources', () => { const result = await investigateResource(customResourceError({ logicalId: undefined }), sdk, debug); expect(result).toEqual([]); }); + + test('resolves a YAML string-form Fn::GetAtt ServiceToken', async () => { + // YAML `!GetAtt ProviderFn.Arn` deserializes to { 'Fn::GetAtt': 'ProviderFn.Arn' } (a string). + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith({ 'Fn::GetAtt': 'ProviderFn.Arn' }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ + LogicalResourceId: 'ProviderFn', + PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:yaml-fn', + } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [{ message: 'x' }] }); + + await investigateResource(customResourceError(), sdk, debug); + + expect(mockCloudFormationClient).toHaveReceivedCommandWith(DescribeStackResourcesCommand, { + StackName: STACK_ARN, + LogicalResourceId: 'ProviderFn', + }); + expect(mockCloudWatchClient).toHaveReceivedCommandWith(FilterLogEventsCommand, { logGroupName: '/aws/lambda/yaml-fn' }); + }); + + test('falls back to a group-wide scan when the targeted stream has no events (stale stream)', async () => { + // Stream-scoped query returns nothing (e.g. stale create-time stream on an update failure); + // the un-scoped group scan finds the actual failing invocation's logs. + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + // Call 1 (targeted stream) returns nothing; call 2 (un-scoped group scan) finds the logs. + // Sequenced with resolvesOnce so the fallback path is genuinely required (partial input + // matchers would overlap and let the targeted call satisfy the group-scan mock). + mockCloudWatchClient.on(FilterLogEventsCommand) + .resolvesOnce({ events: [] }) + .resolves({ events: [{ message: 'actual failure on update' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['actual failure on update']); + // It must have tried the targeted stream first... + expect(mockCloudWatchClient).toHaveReceivedNthCommandWith(1, FilterLogEventsCommand, { + logGroupName: '/aws/lambda/my-cr-fn', + logStreamNames: ['2026/06/15/[$LATEST]streamabc'], + }); + // ...then a second, un-scoped group scan (no logStreamNames). + expect(mockCloudWatchClient).toHaveReceivedCommandTimes(FilterLogEventsCommand, 2); + const secondCall = mockCloudWatchClient.commandCalls(FilterLogEventsCommand)[1].args[0].input as any; + expect(secondCall.logStreamNames).toBeUndefined(); + }); + + test('links to the configured log group (not the convention group) when both are empty', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), + }); + // Every filterLogEvents (convention + configured, targeted + scan) returns empty. + mockCloudWatchClient.on(FilterLogEventsCommand).resolves({ events: [] }); + mockLambdaClient.on(GetFunctionConfigurationCommand).resolves({ LoggingConfig: { LogGroup: '/custom/log/group' } }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages[0]).toMatch(/No log events found/); + // The link must point at the configured group, where the function actually logs. + expect(logs!.link).toContain('$252Fcustom$252Flog$252Fgroup'); + expect(logs!.link).not.toContain('my-cr-fn'); + }); }); From 4bdec2764a0c3ee6dd450fd11b6191e112a15088 Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:38:54 -0400 Subject: [PATCH 3/7] refactor(toolkit-lib): share log-line trimming between ECS and custom resource paths Extract the duplicated CloudWatch message trim/cap/omitted-marker logic into a single trimToRecentLines helper used by both investigation paths, and have the ECS path consume the shared FALLBACK_LOG_WINDOW_MS constant instead of an inline literal so the two paths' fallback windows can't silently diverge. --- .../api/diagnosing/resource-investigation.ts | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index b52f9c127..0cc0e003f 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -11,6 +11,29 @@ import type { ResourceError } from '../stack-events/resource-errors'; */ const MAX_LOG_LINES = 50; +/** Fallback look-back when no failure timestamp is available. */ +const FALLBACK_LOG_WINDOW_MS = 30 * 60 * 1000; + +/** + * Turn raw CloudWatch log event messages into the trimmed lines we render. + * + * Keeps only the most recent {@link MAX_LOG_LINES} (newer output is more useful for + * diagnosis) and prepends an "N earlier lines omitted" marker when truncation happened. + * This is the single truncation point shared by all CloudWatch contexts — the formatter + * renders the result verbatim. + */ +function trimToRecentLines(events: Array<{ message?: string }>): string[] { + const allMessages = events + .map(e => e.message?.trimEnd()) + .filter((m): m is string => m != null); + const messages = allMessages.slice(-MAX_LOG_LINES); + const omitted = allMessages.length - messages.length; + if (omitted > 0) { + messages.unshift(`... (${omitted} earlier lines omitted)`); + } + return messages; +} + /** * Options that influence how a resource is investigated. */ @@ -350,7 +373,7 @@ async function fetchRecentLogs( const resp = await cwl.filterLogEvents({ logGroupName: logConfig.logGroup, - startTime: Date.now() - 30 * 60 * 1000, + startTime: Date.now() - FALLBACK_LOG_WINDOW_MS, limit: 1000, ...(targetStream ? { logStreamNames: [targetStream] } @@ -363,16 +386,7 @@ async function fetchRecentLogs( return undefined; } - // Keep the most recent lines (newer output is more useful for diagnosis). - // This is the only truncation point — the formatter renders these verbatim. - const allMessages = events - .map(e => e.message?.trimEnd()) - .filter((m): m is string => m != null); - const messages: string[] = allMessages.slice(-MAX_LOG_LINES); - const omitted = allMessages.length - messages.length; - if (omitted > 0) { - messages.unshift(`... (${omitted} earlier lines omitted)`); - } + const messages = trimToRecentLines(events); if (taskIds.length > 1) { messages.push(`(showing logs from last failed task; ${taskIds.length - 1} other failed task(s) available in console)`); @@ -403,9 +417,6 @@ async function fetchRecentLogs( const LOG_WINDOW_BEFORE_MS = 2 * 60 * 1000; const LOG_WINDOW_AFTER_MS = 15 * 60 * 1000; -/** Fallback look-back when no failure timestamp is available (matches the ECS path). */ -const FALLBACK_LOG_WINDOW_MS = 30 * 60 * 1000; - /** * Investigate a failed custom resource by surfacing its backing Lambda's CloudWatch logs. * @@ -654,13 +665,7 @@ async function filterLogLines( await debug(`Custom resource investigation: no log events in ${logGroup}${streamName ? ` (stream: ${streamName})` : ''}`); return undefined; } - const allMessages = events.map(e => e.message?.trimEnd()).filter((m): m is string => m != null); - const messages = allMessages.slice(-MAX_LOG_LINES); - const omitted = allMessages.length - messages.length; - if (omitted > 0) { - messages.unshift(`... (${omitted} earlier lines omitted)`); - } - return messages; + return trimToRecentLines(events); } catch (e: any) { await debug(`Custom resource investigation: failed to fetch logs from ${logGroup}: ${e.message}`); return undefined; From 7687d8a888f20815ff83e2d96bb47cbe91e4c71f Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Fri, 19 Jun 2026 10:42:29 -0400 Subject: [PATCH 4/7] feat(toolkit-lib): label custom resource logs with their log group Prepend a 'Logs from :' line to the custom-resource log context so the output identifies which function's logs are shown in plain text. The formatter renders messages but not the structured `source` field, and the console link is URL-encoded, so without this the log group was not human-readable in the output. --- .../lib/api/diagnosing/resource-investigation.ts | 7 ++++++- .../api/diagnosing/resource-investigation.test.ts | 13 +++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index 0cc0e003f..aabfe3bd6 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -611,9 +611,14 @@ async function fetchCustomResourceLogs( } } + // Lead with the log group so the user can tell which function these logs belong to + // (the formatter renders messages but not `source`, and the link is URL-encoded). + const header = `Logs from ${logGroup}:`; + const body = messages ?? ['No log events found around the time of failure. The function may not have produced output, or logging may not be configured.']; + return [{ source: 'Custom Resource Lambda Logs', - messages: messages ?? ['No log events found around the time of failure. The function may not have produced output, or logging may not be configured.'], + messages: [header, ...body], link: cloudWatchLogsConsoleUrl(region, logGroup), linkLabel: 'Logs', }]; diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index 41b590027..21070fa9b 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -491,7 +491,7 @@ describe('investigateResource for custom resources', () => { expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); expect(logs).toBeDefined(); - expect(logs!.messages).toEqual(['Traceback: KeyError "Foo"']); + expect(logs!.messages).toEqual(['Logs from /aws/lambda/my-cr-fn:', 'Traceback: KeyError "Foo"']); expect(logs!.linkLabel).toEqual('Logs'); expect(logs!.link).toContain('logsV2:log-groups'); }); @@ -549,7 +549,7 @@ describe('investigateResource for custom resources', () => { expect(mockLambdaClient).toHaveReceivedCommandWith(GetFunctionConfigurationCommand, { FunctionName: 'my-cr-fn' }); const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); - expect(logs!.messages).toEqual(['custom group line']); + expect(logs!.messages).toEqual(['Logs from /custom/log/group:', 'custom group line']); expect(logs!.link).toContain('$252Fcustom'); // double-encoded /custom... }); @@ -597,7 +597,7 @@ describe('investigateResource for custom resources', () => { const result = await investigateResource(customResourceError(), sdk, debug); const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); - expect(logs!.messages[0]).toMatch(/No log events found/); + expect(logs!.messages.join('\n')).toMatch(/No log events found/); }); test('returns empty when the resource has no ServiceToken', async () => { @@ -661,7 +661,7 @@ describe('investigateResource for custom resources', () => { const result = await investigateResource(customResourceError(), sdk, debug); const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); - expect(logs!.messages).toEqual(['actual failure on update']); + expect(logs!.messages).toEqual(['Logs from /aws/lambda/my-cr-fn:', 'actual failure on update']); // It must have tried the targeted stream first... expect(mockCloudWatchClient).toHaveReceivedNthCommandWith(1, FilterLogEventsCommand, { logGroupName: '/aws/lambda/my-cr-fn', @@ -684,8 +684,9 @@ describe('investigateResource for custom resources', () => { const result = await investigateResource(customResourceError(), sdk, debug); const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); - expect(logs!.messages[0]).toMatch(/No log events found/); - // The link must point at the configured group, where the function actually logs. + expect(logs!.messages.join('\n')).toMatch(/No log events found/); + // The header and link must point at the configured group, where the function actually logs. + expect(logs!.messages[0]).toEqual('Logs from /custom/log/group:'); expect(logs!.link).toContain('$252Fcustom$252Flog$252Fgroup'); expect(logs!.link).not.toContain('my-cr-fn'); }); From fe92fd8086bb74ce410ac4992a8e4e6e21521cb2 Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Fri, 19 Jun 2026 13:44:34 -0400 Subject: [PATCH 5/7] fix(toolkit-lib): read custom resource log group from template, not live function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A first-deploy custom resource failure rolls back and deletes the backing Lambda, so getFunctionConfiguration is unavailable exactly when we need the configured (advanced-logging) log group. Read LoggingConfig.LogGroup from the stack template instead — the template survives rollback — handling both a literal value and the common CDK shape where it is a Ref to an AWS::Logs::LogGroup resource. Fall back to the live function configuration only when the template value is an unresolvable intrinsic or the function is defined outside this stack. --- .../api/diagnosing/resource-investigation.ts | 67 ++++++++++++++----- .../diagnosing/resource-investigation.test.ts | 49 ++++++++++++++ 2 files changed, 99 insertions(+), 17 deletions(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index aabfe3bd6..04ddffbbc 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -449,33 +449,44 @@ async function investigateCustomResource( const cwl = sdk.cloudWatchLogs(); const region = sdk.currentRegion; - const serviceToken = await getCustomResourceServiceToken(cfn, stackName, err.logicalId, debug); + // Fetch the template once: it carries both the ServiceToken and (for functions defined in + // this stack) the backing function's LoggingConfig. The template survives rollback even + // when the function itself is deleted, so it's the most reliable source for the log group. + const template = await getStackTemplate(cfn, stackName, debug); + if (!template) { + return []; + } + + const serviceToken = template.Resources?.[err.logicalId]?.Properties?.ServiceToken; if (serviceToken === undefined) { + await debug(`Custom resource investigation: no ServiceToken on resource "${err.logicalId}"`); return []; } - const functionName = await resolveServiceTokenToFunctionName(cfn, stackName, serviceToken, debug); + const referencedLogicalId = serviceTokenReferencedLogicalId(serviceToken); + const functionName = await resolveServiceTokenToFunctionName(cfn, stackName, serviceToken, referencedLogicalId, debug); if (!functionName) { await debug('Custom resource investigation: could not resolve ServiceToken to a Lambda function'); return []; } + // Prefer the function's configured log group as read from the template (rollback-proof). + // Only resolvable when the function is defined in this stack (ServiceToken is a Ref/GetAtt). + const templateLogGroup = referencedLogicalId ? configuredLogGroupFromTemplate(template, referencedLogicalId) : undefined; + // The cfn-response library writes the failing log stream name into the status reason // (and uses it as the default physical ID). Targeting it gives the exact invocation. const streamName = extractLogStreamName(err.message) ?? logStreamNameFromPhysicalId(err.physicalId); - return fetchCustomResourceLogs(cwl, lambda, functionName, streamName, err.timestamp, region, debug); + return fetchCustomResourceLogs(cwl, lambda, functionName, templateLogGroup, streamName, err.timestamp, region, debug); } /** - * Read the failed custom resource's `ServiceToken` property from the stack's original template. - * - * Returns `undefined` if the template can't be read or the resource/property is missing. + * Fetch and parse the stack's (original) template. Returns `undefined` if it can't be read. */ -async function getCustomResourceServiceToken( +async function getStackTemplate( cfn: ICloudFormationClient, stackName: string, - logicalId: string, debug: (msg: string) => Promise, ): Promise { try { @@ -484,18 +495,36 @@ async function getCustomResourceServiceToken( await debug('Custom resource investigation: empty template body'); return undefined; } - const template = deserializeStructure(resp.TemplateBody); - const serviceToken = template?.Resources?.[logicalId]?.Properties?.ServiceToken; - if (serviceToken === undefined) { - await debug(`Custom resource investigation: no ServiceToken on resource "${logicalId}"`); - } - return serviceToken; + return deserializeStructure(resp.TemplateBody); } catch (e: any) { await debug(`Custom resource investigation: failed to read template: ${e.message}`); return undefined; } } +/** + * Read a backing Lambda's configured log group from the template, if it can be determined + * without a live API call. + * + * Handles a literal `LoggingConfig.LogGroup`, and the common CDK shape where it is a `Ref` + * to an `AWS::Logs::LogGroup` resource with a literal `LogGroupName`. Returns `undefined` + * for unresolvable intrinsics (caller falls back to the live function configuration). + */ +function configuredLogGroupFromTemplate(template: any, functionLogicalId: string): string | undefined { + const logGroup = template.Resources?.[functionLogicalId]?.Properties?.LoggingConfig?.LogGroup; + if (typeof logGroup === 'string') { + return logGroup; + } + if (logGroup && typeof logGroup === 'object' && typeof logGroup.Ref === 'string') { + const referenced = template.Resources?.[logGroup.Ref]; + const name = referenced?.Properties?.LogGroupName; + if (typeof name === 'string') { + return name; + } + } + return undefined; +} + /** * Resolve a `ServiceToken` value (a literal ARN, an `Fn::GetAtt`, or a `Ref`) to a Lambda * function name. Intrinsics are resolved to a physical ID via `describeStackResources`. @@ -504,9 +533,9 @@ async function resolveServiceTokenToFunctionName( cfn: ICloudFormationClient, stackName: string, serviceToken: any, + referencedLogicalId: string | undefined, debug: (msg: string) => Promise, ): Promise { - const referencedLogicalId = serviceTokenReferencedLogicalId(serviceToken); if (referencedLogicalId) { try { const resp = await cfn.describeStackResources({ StackName: stackName, LogicalResourceId: referencedLogicalId }); @@ -586,6 +615,7 @@ async function fetchCustomResourceLogs( cwl: ICloudWatchLogsClient, lambda: ILambdaClient, functionName: string, + templateLogGroup: string | undefined, streamName: string | undefined, timestamp: Date | undefined, region: string, @@ -595,7 +625,7 @@ async function fetchCustomResourceLogs( const startTime = failureTime !== undefined ? failureTime - LOG_WINDOW_BEFORE_MS : Date.now() - FALLBACK_LOG_WINDOW_MS; const endTime = failureTime !== undefined ? failureTime + LOG_WINDOW_AFTER_MS : undefined; - // Convention first; only pay for getFunctionConfiguration if the convention group is empty. + // Convention first; only pay for the configured group if the convention group is empty. const conventionGroup = `/aws/lambda/${functionName}`; let messages = await fetchLogLines(cwl, conventionGroup, streamName, startTime, endTime, debug); // The group we point the user at. Once we learn the function's configured log group, prefer @@ -604,7 +634,10 @@ async function fetchCustomResourceLogs( let logGroup = conventionGroup; if (messages === undefined) { - const configuredGroup = await configuredLogGroup(lambda, functionName, debug); + // Prefer the template-derived group (rollback-proof); fall back to the live function + // configuration only when the template couldn't tell us (e.g. unresolvable intrinsic, or + // the function is defined outside this stack). + const configuredGroup = templateLogGroup ?? await configuredLogGroup(lambda, functionName, debug); if (configuredGroup && configuredGroup !== conventionGroup) { logGroup = configuredGroup; messages = await fetchLogLines(cwl, configuredGroup, streamName, startTime, endTime, debug); diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index 21070fa9b..baa514fbf 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -475,6 +475,55 @@ describe('investigateResource for custom resources', () => { }); } + test('reads the configured log group from the template without a live function call (rollback-proof)', async () => { + // ServiceToken is a GetAtt to a function defined in this stack whose LoggingConfig.LogGroup + // is a literal. The function may be deleted by rollback, so we must NOT need getFunctionConfiguration. + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: { 'Fn::GetAtt': ['ProviderFn', 'Arn'] } } }, + ProviderFn: { Type: 'AWS::Lambda::Function', Properties: { LoggingConfig: { LogGroup: '/custom/grp' } } }, + }, + }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ LogicalResourceId: 'ProviderFn', PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn' } as any], + }); + // Convention group empty; the configured group (from template) has the logs. + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/custom/grp' }).resolves({ events: [{ message: 'configured group line' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + // The whole point: log group came from the template, so no live function call was needed. + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['Logs from /custom/grp:', 'configured group line']); + }); + + test('resolves a template LoggingConfig.LogGroup given as a Ref to a log-group resource', async () => { + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: { 'Fn::GetAtt': ['ProviderFn', 'Arn'] } } }, + ProviderFn: { Type: 'AWS::Lambda::Function', Properties: { LoggingConfig: { LogGroup: { Ref: 'FnLogs' } } } }, + FnLogs: { Type: 'AWS::Logs::LogGroup', Properties: { LogGroupName: '/explicit/group/name' } }, + }, + }), + }); + mockCloudFormationClient.on(DescribeStackResourcesCommand).resolves({ + StackResources: [{ LogicalResourceId: 'ProviderFn', PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn' } as any], + }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/explicit/group/name' }).resolves({ events: [{ message: 'x' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages[0]).toEqual('Logs from /explicit/group/name:'); + }); + test('resolves a literal-ARN ServiceToken and fetches the failing stream from the convention group', async () => { mockCloudFormationClient.on(GetTemplateCommand).resolves({ TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), From d638aad0c473bf7a8ae407e19bb100cbccf64241 Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Fri, 19 Jun 2026 15:00:11 -0400 Subject: [PATCH 6/7] fix(toolkit-lib): resolve CloudFormation-generated custom resource log group names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The template-sourced log group only handled a literal LogGroupName. The common CDK case is an AWS::Logs::LogGroup with no explicit name, where CloudFormation generates the physical name — absent from the template. Resolve that name via describeStackResources (which still returns RETAINed/orphaned resources after a rollback), so advanced-logging functions whose backing Lambda was deleted by rollback still surface their logs. Extract a shared resolvePhysicalId helper used by both the ServiceToken and log-group resolution paths. Verified end-to-end against a live advanced-logging custom resource deployment. --- .../api/diagnosing/resource-investigation.ts | 61 ++++++++++++++----- .../diagnosing/resource-investigation.test.ts | 28 +++++++++ 2 files changed, 73 insertions(+), 16 deletions(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index 04ddffbbc..6838bc665 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -470,9 +470,11 @@ async function investigateCustomResource( return []; } - // Prefer the function's configured log group as read from the template (rollback-proof). + // Prefer the function's configured log group as derived from the template (rollback-proof). // Only resolvable when the function is defined in this stack (ServiceToken is a Ref/GetAtt). - const templateLogGroup = referencedLogicalId ? configuredLogGroupFromTemplate(template, referencedLogicalId) : undefined; + const templateLogGroup = referencedLogicalId + ? await resolveConfiguredLogGroup(cfn, stackName, template, referencedLogicalId, debug) + : undefined; // The cfn-response library writes the failing log stream name into the status reason // (and uses it as the default physical ID). Targeting it gives the exact invocation. @@ -503,14 +505,26 @@ async function getStackTemplate( } /** - * Read a backing Lambda's configured log group from the template, if it can be determined - * without a live API call. + * Resolve the backing Lambda's configured log group from the template. * - * Handles a literal `LoggingConfig.LogGroup`, and the common CDK shape where it is a `Ref` - * to an `AWS::Logs::LogGroup` resource with a literal `LogGroupName`. Returns `undefined` - * for unresolvable intrinsics (caller falls back to the live function configuration). + * The template survives rollback (when the live function may not), so it is the preferred + * source. Handles the function's `LoggingConfig.LogGroup` as: + * - a literal string (returned directly); + * - a `Ref` to an `AWS::Logs::LogGroup` with a literal `LogGroupName` (returned directly); + * - a `Ref` to an `AWS::Logs::LogGroup` whose name CloudFormation generates (the common CDK + * case) — resolved to its physical name via `describeStackResources`, which still returns + * RETAINed/orphaned resources after a rollback. + * + * Returns `undefined` when there is no configured log group or it can't be resolved + * (caller then falls back to the live function configuration). */ -function configuredLogGroupFromTemplate(template: any, functionLogicalId: string): string | undefined { +async function resolveConfiguredLogGroup( + cfn: ICloudFormationClient, + stackName: string, + template: any, + functionLogicalId: string, + debug: (msg: string) => Promise, +): Promise { const logGroup = template.Resources?.[functionLogicalId]?.Properties?.LoggingConfig?.LogGroup; if (typeof logGroup === 'string') { return logGroup; @@ -521,10 +535,31 @@ function configuredLogGroupFromTemplate(template: any, functionLogicalId: string if (typeof name === 'string') { return name; } + // No explicit name (CloudFormation generates it) — resolve the log-group resource's + // physical name, which is the log group name. + return resolvePhysicalId(cfn, stackName, logGroup.Ref, debug); } return undefined; } +/** + * Resolve a resource's physical ID by logical ID. Returns `undefined` on failure. + */ +async function resolvePhysicalId( + cfn: ICloudFormationClient, + stackName: string, + logicalId: string, + debug: (msg: string) => Promise, +): Promise { + try { + const resp = await cfn.describeStackResources({ StackName: stackName, LogicalResourceId: logicalId }); + return resp.StackResources?.[0]?.PhysicalResourceId; + } catch (e: any) { + await debug(`Custom resource investigation: failed to resolve physical ID for "${logicalId}": ${e.message}`); + return undefined; + } +} + /** * Resolve a `ServiceToken` value (a literal ARN, an `Fn::GetAtt`, or a `Ref`) to a Lambda * function name. Intrinsics are resolved to a physical ID via `describeStackResources`. @@ -537,14 +572,8 @@ async function resolveServiceTokenToFunctionName( debug: (msg: string) => Promise, ): Promise { if (referencedLogicalId) { - try { - const resp = await cfn.describeStackResources({ StackName: stackName, LogicalResourceId: referencedLogicalId }); - const physicalId = resp.StackResources?.[0]?.PhysicalResourceId; - return physicalId ? functionNameFromArnOrName(physicalId) : undefined; - } catch (e: any) { - await debug(`Custom resource investigation: failed to resolve ServiceToken reference "${referencedLogicalId}": ${e.message}`); - return undefined; - } + const physicalId = await resolvePhysicalId(cfn, stackName, referencedLogicalId, debug); + return physicalId ? functionNameFromArnOrName(physicalId) : undefined; } if (typeof serviceToken === 'string') { diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index baa514fbf..78473d1e8 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -524,6 +524,34 @@ describe('investigateResource for custom resources', () => { expect(logs!.messages[0]).toEqual('Logs from /explicit/group/name:'); }); + test('resolves a Ref log group with no explicit name via describeStackResources (CDK default)', async () => { + // The common CDK case: the AWS::Logs::LogGroup has no LogGroupName, so CloudFormation + // generates the physical name. We must resolve it via describeStackResources (which still + // returns the RETAINed group after rollback), not give up. + mockCloudFormationClient.on(GetTemplateCommand).resolves({ + TemplateBody: JSON.stringify({ + Resources: { + MyCustomResource: { Type: 'Custom::MyThing', Properties: { ServiceToken: { 'Fn::GetAtt': ['ProviderFn', 'Arn'] } } }, + ProviderFn: { Type: 'AWS::Lambda::Function', Properties: { LoggingConfig: { LogGroup: { Ref: 'FnLogs' } } } }, + FnLogs: { Type: 'AWS::Logs::LogGroup', Properties: { RetentionInDays: 7 } }, // no LogGroupName + }, + }), + }); + // ServiceToken's ProviderFn and the log group's FnLogs are resolved by logical ID. + mockCloudFormationClient.on(DescribeStackResourcesCommand, { StackName: STACK_ARN, LogicalResourceId: 'ProviderFn' }) + .resolves({ StackResources: [{ LogicalResourceId: 'ProviderFn', PhysicalResourceId: 'arn:aws:lambda:us-east-1:123456789012:function:provider-fn' } as any] }); + mockCloudFormationClient.on(DescribeStackResourcesCommand, { StackName: STACK_ARN, LogicalResourceId: 'FnLogs' }) + .resolves({ StackResources: [{ LogicalResourceId: 'FnLogs', PhysicalResourceId: 'MyStack-FnLogs-GENERATED123' } as any] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: '/aws/lambda/provider-fn' }).resolves({ events: [] }); + mockCloudWatchClient.on(FilterLogEventsCommand, { logGroupName: 'MyStack-FnLogs-GENERATED123' }).resolves({ events: [{ message: 'advanced logging line' }] }); + + const result = await investigateResource(customResourceError(), sdk, debug); + + expect(mockLambdaClient).not.toHaveReceivedCommand(GetFunctionConfigurationCommand); + const logs = result.find(c => c.source === 'Custom Resource Lambda Logs'); + expect(logs!.messages).toEqual(['Logs from MyStack-FnLogs-GENERATED123:', 'advanced logging line']); + }); + test('resolves a literal-ARN ServiceToken and fetches the failing stream from the convention group', async () => { mockCloudFormationClient.on(GetTemplateCommand).resolves({ TemplateBody: templateWith('arn:aws:lambda:us-east-1:123456789012:function:my-cr-fn'), From 189399d9f69d48060cad7019fa705952f50f4a75 Mon Sep 17 00:00:00 2001 From: Ian Hou <45278651+iankhou@users.noreply.github.com> Date: Fri, 19 Jun 2026 15:44:16 -0400 Subject: [PATCH 7/7] feat(toolkit-lib): normalize custom resource Lambda logs for readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse Lambda CloudWatch log events into readable lines before rendering, on the custom-resource path only (ECS logs are arbitrary container output and are left as-is). Handles both Lambda log formats: - Text: strip the per-line timestamp/requestId prefix, render aligned 'LEVEL message'. - JSON: render the level + message (and error envelopes' errorMessage + stackTrace) instead of dumping raw JSON objects. Drops only Lambda platform boilerplate (INIT_START/START/END/REPORT and JSON platform.* events). Application output is never dropped by level — failure detail often rides in INFO lines (e.g. the cfn-response 'Response body'). Lines we don't recognize pass through verbatim, and full logs remain available via the console link. --- .../api/diagnosing/resource-investigation.ts | 118 +++++++++++++++++- .../diagnosing/resource-investigation.test.ts | 87 +++++++++++++ 2 files changed, 204 insertions(+), 1 deletion(-) diff --git a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts index 6838bc665..a232005be 100644 --- a/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts +++ b/packages/@aws-cdk/toolkit-lib/lib/api/diagnosing/resource-investigation.ts @@ -34,6 +34,120 @@ function trimToRecentLines(events: Array<{ message?: string }>): string[] { return messages; } +/** + * Lambda platform log lines (text format) that carry no application signal. + */ +const LAMBDA_PLATFORM_LINE = /^(INIT_START|START RequestId:|END RequestId:|REPORT RequestId:)/; + +/** + * Normalize Lambda CloudWatch log events into readable lines. + * + * Lambda emits logs in one of two formats (per the function's `LoggingConfig.LogFormat`): + * - **Text**: `\t\t\t`, plus platform lines. + * - **JSON**: one JSON object per event (`{ timestamp, level, message, ... }`). + * + * For both we surface `LEVEL message` (or just the message when there's no level), strip the + * redundant per-line timestamp/requestId (it's all one invocation), and drop pure platform + * boilerplate. We never drop application output — failure detail is often logged at INFO + * (e.g. the cfn-response "Response body" line). Anything we don't recognize passes through + * verbatim, and the full logs remain available via the console link. + * + * This is Lambda-specific; it is not applied to ECS logs, which are arbitrary container output. + */ +export function parseLambdaLogEvents(events: Array<{ message?: string }>): Array<{ message: string }> { + const out: Array<{ message: string }> = []; + for (const e of events) { + const raw = e.message; + if (raw == null) { + continue; + } + const normalized = normalizeLambdaLine(raw); + if (normalized !== undefined) { + out.push({ message: normalized }); + } + } + return out; +} + +/** + * Normalize a single Lambda log line. Returns `undefined` to drop the line (platform noise), + * or the cleaned-up text to keep. + */ +function normalizeLambdaLine(raw: string): string | undefined { + const trimmed = raw.trimEnd(); + + // JSON-format event: { timestamp, level, message, ... } (one object per line). + const jsonResult = normalizeJsonLogLine(trimmed); + if (jsonResult !== undefined) { + return jsonResult || undefined; + } + + // Text-format platform boilerplate: drop. + if (LAMBDA_PLATFORM_LINE.test(trimmed)) { + return undefined; + } + + // Text-format app line: `\t\t\t`. + // Strip the timestamp + requestId prefix; keep `LEVEL message` (or the rest verbatim). + const parts = trimmed.split('\t'); + if (parts.length >= 4 && /^\d{4}-\d{2}-\d{2}T/.test(parts[0])) { + const level = parts[2]; + const message = parts.slice(3).join('\t'); + return formatLeveledLine(level, message); + } + + // Unrecognized (continuation line, plain stdout, etc.) — keep verbatim. + return trimmed; +} + +/** + * If `line` is a JSON-format Lambda log object, render it as `LEVELmessage` + * (or just the message when there's no level). Returns `undefined` when it isn't JSON. + * + * Drops JSON platform events (`type`/`record` envelopes for `platform.*`), which carry no + * application signal. + */ +function normalizeJsonLogLine(line: string): string | undefined { + if (!line.startsWith('{')) { + return undefined; + } + let obj: any; + try { + obj = JSON.parse(line); + } catch { + return undefined; + } + if (!obj || typeof obj !== 'object') { + return undefined; + } + + // Platform events (e.g. { type: 'platform.report', record: {...} }) — drop. + if (typeof obj.type === 'string' && obj.type.startsWith('platform.')) { + return ''; + } + + const level = typeof obj.level === 'string' ? obj.level : undefined; + // Lambda uses `message`; a thrown error envelope uses `errorMessage` (+ optional stackTrace). + let message: string; + if (typeof obj.message === 'string') { + message = obj.message; + } else if (typeof obj.errorMessage === 'string') { + message = Array.isArray(obj.stackTrace) ? [obj.errorMessage, ...obj.stackTrace].join('\n') : obj.errorMessage; + } else { + // JSON, but not a shape we recognize — render compactly rather than dropping signal. + message = line; + } + return level ? formatLeveledLine(level, message) : message; +} + +/** + * Render a log level and message as `LEVEL message`, padding the level to a fixed width so + * lines align in the terminal. Multi-line messages keep their internal newlines. + */ +function formatLeveledLine(level: string, message: string): string { + return `${level.padEnd(5)} ${message}`; +} + /** * Options that influence how a resource is investigated. */ @@ -732,7 +846,9 @@ async function filterLogLines( await debug(`Custom resource investigation: no log events in ${logGroup}${streamName ? ` (stream: ${streamName})` : ''}`); return undefined; } - return trimToRecentLines(events); + // Lambda log events have a known structure (text- or JSON-format), unlike raw ECS + // container output, so we normalize them into readable lines before trimming. + return trimToRecentLines(parseLambdaLogEvents(events)); } catch (e: any) { await debug(`Custom resource investigation: failed to fetch logs from ${logGroup}: ${e.message}`); return undefined; diff --git a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts index 78473d1e8..1d2092fbc 100644 --- a/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts +++ b/packages/@aws-cdk/toolkit-lib/test/api/diagnosing/resource-investigation.test.ts @@ -12,6 +12,7 @@ import { functionNameFromArnOrName, investigateResource, parseEcsServiceIdentifier, + parseLambdaLogEvents, serviceTokenReferencedLogicalId, } from '../../../lib/api/diagnosing/resource-investigation'; import type { ResourceError } from '../../../lib/api/stack-events/resource-errors'; @@ -453,6 +454,92 @@ describe('extractLogStreamName', () => { }); }); +describe('parseLambdaLogEvents', () => { + const msgs = (events: Array<{ message?: string }>) => parseLambdaLogEvents(events).map(e => e.message); + + test('strips the timestamp/requestId prefix from text-format lines and aligns the level', () => { + expect(msgs([ + { message: '2026-06-19T18:25:11.112Z\treq-1\tERROR\tBoom: it failed' }, + { message: '2026-06-19T18:25:11.113Z\treq-1\tINFO\tall good' }, + ])).toEqual([ + 'ERROR Boom: it failed', + 'INFO all good', + ]); + }); + + test('drops Lambda platform boilerplate (INIT_START/START/END/REPORT)', () => { + expect(msgs([ + { message: 'INIT_START Runtime Version: nodejs:20.v101' }, + { message: 'START RequestId: req-1 Version: $LATEST' }, + { message: '2026-06-19T18:25:11.112Z\treq-1\tERROR\tthe real error' }, + { message: 'END RequestId: req-1' }, + { message: 'REPORT RequestId: req-1\tDuration: 1009 ms' }, + ])).toEqual(['ERROR the real error']); + }); + + test('keeps INFO-level application output (failure detail often rides in INFO)', () => { + // The cfn-response failure body is logged at INFO; it must survive. + const out = msgs([ + { message: '2026-06-19T18:25:11.113Z\treq-1\tINFO\tResponse body: {"Status":"FAILED","Data":{"error":"x"}}' }, + ]); + expect(out).toEqual(['INFO Response body: {"Status":"FAILED","Data":{"error":"x"}}']); + }); + + test('normalizes JSON-format events to LEVEL + message', () => { + expect(msgs([ + { message: '{"timestamp":"2026-06-19T18:25:11.112Z","level":"ERROR","requestId":"req-1","message":"Boom: it failed"}' }, + { message: '{"level":"INFO","message":"all good"}' }, + ])).toEqual([ + 'ERROR Boom: it failed', + 'INFO all good', + ]); + }); + + test('drops JSON platform events', () => { + expect(msgs([ + { message: '{"time":"2026-06-19T18:25:11Z","type":"platform.report","record":{"metrics":{"maxMemoryUsedMB":74}}}' }, + { message: '{"level":"ERROR","message":"kept"}' }, + ])).toEqual(['ERROR kept']); + }); + + test('renders a JSON error envelope with stack trace', () => { + expect(msgs([ + { message: '{"level":"ERROR","errorMessage":"KeyError: foo","stackTrace":[" at a"," at b"]}' }, + ])).toEqual(['ERROR KeyError: foo\n at a\n at b']); + }); + + test('passes through malformed JSON verbatim', () => { + expect(msgs([{ message: '{not valid json' }])).toEqual(['{not valid json']); + }); + + test('passes through plain/unstructured lines verbatim', () => { + expect(msgs([ + { message: 'a plain stdout line with no structure' }, + { message: ' at SomeStackFrame (file.js:1:2)' }, + ])).toEqual([ + 'a plain stdout line with no structure', + ' at SomeStackFrame (file.js:1:2)', + ]); + }); + + test('handles a mixed batch of text, JSON, platform, and plain lines', () => { + expect(msgs([ + { message: 'START RequestId: req-1 Version: $LATEST' }, + { message: '2026-06-19T18:25:11.112Z\treq-1\tWARN\ttext line' }, + { message: '{"level":"ERROR","message":"json line"}' }, + { message: 'bare line' }, + ])).toEqual([ + 'WARN text line', + 'ERROR json line', + 'bare line', + ]); + }); + + test('skips events with no message', () => { + expect(msgs([{ message: undefined }, { message: 'kept' }])).toEqual(['kept']); + }); +}); + describe('investigateResource for custom resources', () => { const STACK_ARN = 'arn:aws:cloudformation:us-east-1:123456789012:stack/MyStack/abc';