diff --git a/dependencyManagement/build.gradle.kts b/dependencyManagement/build.gradle.kts index bf112e1cc15..e58e86ec5cf 100644 --- a/dependencyManagement/build.gradle.kts +++ b/dependencyManagement/build.gradle.kts @@ -78,6 +78,7 @@ val DEPENDENCIES = listOf( "com.uber.nullaway:nullaway:0.13.1", "edu.berkeley.cs.jqf:jqf-fuzz:1.7", // jqf-fuzz version 1.8+ requires Java 11+ "eu.rekawek.toxiproxy:toxiproxy-java:2.1.11", + "io.github.crac:org-crac:0.1.3", "io.github.netmikey.logunit:logunit-jul:2.0.0", "io.jaegertracing:jaeger-client:1.8.1", "io.opentelemetry.contrib:opentelemetry-aws-xray-propagator:1.54.0-alpha", diff --git a/integration-tests/build.gradle.kts b/integration-tests/build.gradle.kts index 83d6c2c2ba9..c9dfab1d920 100644 --- a/integration-tests/build.gradle.kts +++ b/integration-tests/build.gradle.kts @@ -11,6 +11,7 @@ dependencies { testImplementation(project(":extensions:trace-propagators")) testImplementation("com.linecorp.armeria:armeria-junit5") + testImplementation("io.github.crac:org-crac") testImplementation("org.junit.jupiter:junit-jupiter-params") testImplementation("org.testcontainers:testcontainers-junit-jupiter") } diff --git a/integration-tests/src/test/java/io/opentelemetry/CracLifecycleIntegrationTest.java b/integration-tests/src/test/java/io/opentelemetry/CracLifecycleIntegrationTest.java new file mode 100644 index 00000000000..a15ec015043 --- /dev/null +++ b/integration-tests/src/test/java/io/opentelemetry/CracLifecycleIntegrationTest.java @@ -0,0 +1,165 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.crac.Context; +import org.crac.Resource; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +/** + * Integration-style lifecycle tests for CRaC (Coordinated Restore at Checkpoint) support. + * + *

These tests use {@link MockCracContext} to simulate the CRaC checkpoint/restore lifecycle + * without a CRaC-enabled JDK. Resources register with the mock context; the test then drives {@code + * beforeCheckpoint} and {@code afterRestore} callbacks directly. + * + *

See: #6756 + */ +class CracLifecycleIntegrationTest { + + /** + * Demonstrates the failure mode when the SDK is naively shut down at checkpoint with no + * corresponding restore logic. This is what happens today without proper CRaC support: the SDK is + * a one-shot object, so spans emitted after a restore are silently dropped. + */ + @Test + void spansDroppedAfterRestore_naiveCracIntegration() throws Exception { + MockCracContext cracContext = new MockCracContext(); + InMemorySpanExporter exporter = new InMemorySpanExporter(); + OpenTelemetrySdk sdk = buildSdk(exporter); + Tracer tracer = sdk.getTracer("crac-lifecycle-test"); + + // Naive CRaC resource: shuts the SDK down at checkpoint, does nothing on restore. + cracContext.register( + new Resource() { + @Override + public void beforeCheckpoint(Context context) { + sdk.getSdkTracerProvider().shutdown().join(10, TimeUnit.SECONDS); + } + + @Override + public void afterRestore(Context context) { + // No restore logic — this is the gap that #6756 addresses. + } + }); + + emitSpan(tracer, "before-checkpoint"); + sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS); + assertThat(exporter.exportedCount()).isEqualTo(1); + + cracContext.simulateCheckpoint(); + cracContext.simulateRestore(); + + // Post-restore span is silently dropped: the SDK is shut down and has no way to reinitialize. + emitSpan(tracer, "after-restore"); + sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS); + assertThat(exporter.exportedCount()).isEqualTo(1); + } + + /** + * Describes the desired behavior once the SDK properly implements {@link Resource}: spans emitted + * after a CRaC restore should be exported normally. + * + *

This test is disabled until #6756 is addressed. + * When that work lands, the SDK (or an adapter it exposes) should register with the CRaC context + * so that {@code beforeCheckpoint} flushes and quiesces, and {@code afterRestore} reinitializes + * exporters and processors. Replace the TODO below with the real SDK API. + */ + @Test + @Disabled("Expected to fail until #6756 adds checkpoint/restore-safe SDK lifecycle") + void spansExportedAfterRestore_properCracIntegration() throws Exception { + MockCracContext cracContext = new MockCracContext(); + InMemorySpanExporter exporter = new InMemorySpanExporter(); + OpenTelemetrySdk sdk = buildSdk(exporter); + Tracer tracer = sdk.getTracer("crac-lifecycle-test"); + + // TODO(#6756): replace this placeholder with the real SDK CRaC API, e.g.: + // cracContext.register(sdk.asCracResource()); + cracContext.register( + new Resource() { + @Override + public void beforeCheckpoint(Context context) throws Exception { + sdk.getSdkTracerProvider().shutdown().join(10, TimeUnit.SECONDS); + } + + @Override + public void afterRestore(Context context) throws Exception { + // Reinitialize: reopen connections, restart background threads. + // No SDK API exists for this yet — this is the body of #6756. + } + }); + + emitSpan(tracer, "before-checkpoint"); + sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS); + assertThat(exporter.exportedCount()).isEqualTo(1); + + cracContext.simulateCheckpoint(); + cracContext.simulateRestore(); + + emitSpan(tracer, "after-restore"); + sdk.getSdkTracerProvider().forceFlush().join(10, TimeUnit.SECONDS); + assertThat(exporter.exportedCount()).isEqualTo(2); + } + + private static OpenTelemetrySdk buildSdk(SpanExporter exporter) { + return OpenTelemetrySdk.builder() + .setTracerProvider( + SdkTracerProvider.builder() + .addSpanProcessor(SimpleSpanProcessor.create(exporter)) + .build()) + .build(); + } + + private static void emitSpan(Tracer tracer, String name) { + Span span = tracer.spanBuilder(name).startSpan(); + span.end(); + } + + private static final class InMemorySpanExporter implements SpanExporter { + private final List spans = new ArrayList<>(); + private boolean shutdown; + + @Override + public CompletableResultCode export(Collection spans) { + if (shutdown) { + return CompletableResultCode.ofFailure(); + } + this.spans.addAll(spans); + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + shutdown = true; + return CompletableResultCode.ofSuccess(); + } + + int exportedCount() { + return spans.size(); + } + } +} diff --git a/integration-tests/src/test/java/io/opentelemetry/MockCracContext.java b/integration-tests/src/test/java/io/opentelemetry/MockCracContext.java new file mode 100644 index 00000000000..c351892401f --- /dev/null +++ b/integration-tests/src/test/java/io/opentelemetry/MockCracContext.java @@ -0,0 +1,56 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry; + +import java.util.ArrayList; +import java.util.List; +import org.crac.Context; +import org.crac.Resource; + +/** + * A test-only {@link Context} that allows simulating CRaC checkpoint and restore lifecycle events + * without requiring a CRaC-enabled JDK. Register resources with {@link #register(Resource)}, then + * call {@link #simulateCheckpoint()} and {@link #simulateRestore()} to drive the lifecycle. + * + *

Notification order follows the CRaC specification: checkpoint callbacks fire in reverse + * registration order; restore callbacks fire in forward registration order. + */ +final class MockCracContext extends Context { + + private final List resources = new ArrayList<>(); + + @Override + public void register(Resource resource) { + resources.add(resource); + } + + /** + * Simulates a CRaC checkpoint by invoking {@link Resource#beforeCheckpoint} on all registered + * resources in reverse registration order, as the CRaC spec requires. + */ + void simulateCheckpoint() throws Exception { + for (int i = resources.size() - 1; i >= 0; i--) { + resources.get(i).beforeCheckpoint(this); + } + } + + /** + * Simulates a CRaC restore by invoking {@link Resource#afterRestore} on all registered resources + * in forward registration order, as the CRaC spec requires. + */ + void simulateRestore() throws Exception { + for (Resource resource : resources) { + resource.afterRestore(this); + } + } + + // Not used: this context is not itself registered with a parent context. + @Override + public void beforeCheckpoint(Context context) {} + + @Override + public void afterRestore(Context context) {} +}