-
Notifications
You must be signed in to change notification settings - Fork 11
fix(otel): Fix X-Ray trace integration and cross-invocation retry spans #508
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,14 +5,17 @@ | |
| import static software.amazon.lambda.durable.otel.SpanAttributes.*; | ||
|
|
||
| import io.opentelemetry.api.common.AttributeKey; | ||
| import io.opentelemetry.api.common.Attributes; | ||
| import io.opentelemetry.api.trace.Span; | ||
| import io.opentelemetry.api.trace.SpanContext; | ||
| import io.opentelemetry.api.trace.SpanKind; | ||
| import io.opentelemetry.api.trace.StatusCode; | ||
| import io.opentelemetry.api.trace.TraceFlags; | ||
| import io.opentelemetry.api.trace.TraceState; | ||
| import io.opentelemetry.api.trace.Tracer; | ||
| import io.opentelemetry.context.Context; | ||
| import io.opentelemetry.context.Scope; | ||
| import io.opentelemetry.sdk.resources.Resource; | ||
| import io.opentelemetry.sdk.trace.SdkTracerProvider; | ||
| import io.opentelemetry.sdk.trace.SdkTracerProviderBuilder; | ||
| import java.time.Instant; | ||
|
|
@@ -128,6 +131,12 @@ public OtelPlugin(SdkTracerProviderBuilder tracerProviderBuilder, ContextExtract | |
| public OtelPlugin( | ||
| SdkTracerProviderBuilder tracerProviderBuilder, ContextExtractor contextExtractor, boolean enableMdc) { | ||
| this.idGenerator = new DeterministicIdGenerator(); | ||
|
|
||
| // Set service.name to "invocation" — X-Ray uses this as the display name for SERVER spans, | ||
| // creating a separate service node in the trace map labeled "invocation". | ||
| var resource = Resource.create(Attributes.of(AttributeKey.stringKey("service.name"), "invocation")); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: use SERVICE_NAME constant from opentelemetry package. |
||
| tracerProviderBuilder.setResource(resource); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This overrides what users have configured for the resource in |
||
|
|
||
| this.tracerProvider = tracerProviderBuilder.setIdGenerator(idGenerator).build(); | ||
| this.tracer = tracerProvider.get(INSTRUMENTATION_NAME); | ||
| this.contextExtractor = contextExtractor; | ||
|
|
@@ -152,10 +161,17 @@ public void onInvocationStart(InvocationInfo info) { | |
| } | ||
| // If no extracted context, idGenerator falls back to ARN-derived trace ID | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is idGenerator using the trace id from previous invocation? |
||
|
|
||
| // Determine parent context for the invocation span | ||
| // Determine parent context for the invocation span. | ||
| // When the X-Ray header has a Parent field, create the invocation span as a | ||
| // child of that segment. This connects our OTLP-exported spans to the Lambda | ||
| // service's X-Ray segments. | ||
| Context parentContext; | ||
| if (extractedContext != null && extractedContext.parentSpanId() != null) { | ||
| // Create a remote parent span context from the X-Ray Parent field | ||
| var activeSpan = Span.fromContext(Context.current()); | ||
| if (activeSpan.getSpanContext().isValid()) { | ||
| // An auto-instrumenter (e.g., ADOT agent) is active — parent to its span | ||
| parentContext = Context.current(); | ||
| } else if (extractedContext != null && extractedContext.parentSpanId() != null) { | ||
| // No active instrumenter, but X-Ray header has parent — create remote parent | ||
| var parentSpanContext = SpanContext.createFromRemoteParent( | ||
| extractedContext.traceId(), | ||
| extractedContext.parentSpanId(), | ||
|
|
@@ -166,8 +182,10 @@ public void onInvocationStart(InvocationInfo info) { | |
| parentContext = Context.root(); | ||
| } | ||
|
|
||
| // Create invocation span as child of Lambda's X-Ray segment (via Parent field) | ||
| var spanBuilder = tracer.spanBuilder("durable.invocation") | ||
| // Create a SERVER span to establish a separate X-Ray service node. | ||
| // X-Ray uses service.name for the segment display name. | ||
| var spanBuilder = tracer.spanBuilder("invocation") | ||
| .setSpanKind(SpanKind.SERVER) | ||
| .setParent(parentContext) | ||
| .setAttribute(DURABLE_EXECUTION_ARN, info.durableExecutionArn()) | ||
| .setAttribute(DURABLE_FIRST_INVOCATION, info.isFirstInvocation()); | ||
|
|
@@ -227,8 +245,6 @@ public void onInvocationEnd(InvocationEndInfo info) { | |
| public void onOperationStart(OperationInfo info) { | ||
| if (info.id() == null) return; | ||
|
|
||
| idGenerator.setNextSpanOperationId(info.id()); | ||
|
|
||
| var parentContext = resolveParentContext(info.parentId()); | ||
|
|
||
| var spanBuilder = tracer.spanBuilder(spanName(info.type(), info.subType(), info.name())) | ||
|
|
@@ -237,6 +253,19 @@ public void onOperationStart(OperationInfo info) { | |
| .setAttribute(DURABLE_OPERATION_ID, info.id()) | ||
| .setAttribute(DURABLE_OPERATION_TYPE, info.type()); | ||
|
|
||
| if (info.isContinuation()) { | ||
| // Operation was already started in a prior invocation — use a random span ID | ||
| // and add a Link to the deterministic span from the original invocation for correlation. | ||
| var deterministicSpanId = idGenerator.generateSpanIdForOperation(info.id()); | ||
| var traceId = idGenerator.generateTraceId(); | ||
| var linkedSpanContext = | ||
| SpanContext.create(traceId, deterministicSpanId, TraceFlags.getSampled(), TraceState.getDefault()); | ||
| spanBuilder.addLink(linkedSpanContext); | ||
| } else { | ||
| // First execution — use deterministic span ID so continuations can link back | ||
| idGenerator.setNextSpanOperationId(info.id()); | ||
| } | ||
|
|
||
| if (info.name() != null) { | ||
| spanBuilder.setAttribute(DURABLE_OPERATION_NAME, info.name()); | ||
| } | ||
|
|
@@ -400,17 +429,17 @@ private Context resolveParentContext(String parentId) { | |
|
|
||
| private static String spanName(String type, String subType, String name) { | ||
| if (name != null) { | ||
| return "durable." + (subType != null ? subType.toLowerCase() : type.toLowerCase()) + ":" + name; | ||
| return name; | ||
| } | ||
| return "durable." + (subType != null ? subType.toLowerCase() : type.toLowerCase()); | ||
| return subType != null ? subType.toLowerCase() : type.toLowerCase(); | ||
| } | ||
|
|
||
| private static String attemptSpanName(String type, String subType, String name, Integer attempt) { | ||
| var base = spanName(type, subType, name); | ||
| if (attempt != null) { | ||
| return base + " [attempt " + attempt + "]"; | ||
| return base + " attempt " + attempt; | ||
| } | ||
| return base + " [fn]"; | ||
| return base; | ||
| } | ||
|
|
||
| private static String attemptKey(String operationId, Integer attempt) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how come we don't need this anymore?