From 972af867a142be5082bc5624d31b928f002851a8 Mon Sep 17 00:00:00 2001 From: Santiago Gimeno Date: Thu, 19 Mar 2026 15:39:28 +0100 Subject: [PATCH 1/2] lib,src,test: fix race during tracing toggles Keep JS tracing decisions tied to the env-local tracing state materialized by the native toggle path instead of rereading live flags in multiple places. Tracing flags can be updated from any thread, so reading them directly during span creation can observe a mid-flight toggle. That could make Tracer.startSpan() return a NonRecordingSpan after the caller had already passed an internal tracing gate, leading to crashes like TypeError: span._pushSpanDataString is not a function. Pass the full tracing bitmask from EnvList::update_tracing_flags() into JS, cache it in lib/internal/nsolid_trace.js, and make the internal OTel code consume that cached per-thread state. This removes the split-brain behavior where callback binding/unbinding followed flagsUpdated but Tracer.startSpan() could independently observe a later disable and return a NonRecordingSpan. For internal spans, stop Tracer.startSpan() from rechecking the current trace flags after the caller has already crossed an internal tracing gate. That keeps internal span creation locally consistent while tracing is being enabled and disabled and avoids crashes in code paths that expect a real N|Solid span object. Also add targeted repro coverage for the toggle race: - extend the tracing test addon with configurable setupTracing(flags), stopTracing(), and skipExpectedTracesCheck() - add a deterministic fetch-based repro that toggles HTTP client tracing while concurrent fetch() traffic is in flight - add a grpc-based repro harness for tracing reconfiguration and cover both fetch and http traffic - teach the grpc agent test client how to generate fetch transactions Together these changes make trace flags materialize per env/thread in JS, preserve the caller's local tracing decision for internal spans, and add regression coverage for the tracing enable/disable race. --- lib/internal/nsolid_trace.js | 16 +++- lib/internal/otel/core.js | 12 +-- lib/internal/otel/trace.js | 5 +- src/nsolid/nsolid_api.cc | 2 +- test/addons/nsolid-tracing/binding.cc | 45 +++++++++-- .../test-otel-fetch-enable-disable-race.js | 75 +++++++++++++++++++ test/agents/test-grpc-tracing-race.mjs | 75 +++++++++++++++++++ test/common/nsolid-grpc-agent/client.js | 35 +++++++++ 8 files changed, 248 insertions(+), 17 deletions(-) create mode 100644 test/addons/nsolid-tracing/test-otel-fetch-enable-disable-race.js create mode 100644 test/agents/test-grpc-tracing-race.mjs diff --git a/lib/internal/nsolid_trace.js b/lib/internal/nsolid_trace.js index e696878108d..fa6125b978c 100644 --- a/lib/internal/nsolid_trace.js +++ b/lib/internal/nsolid_trace.js @@ -19,10 +19,18 @@ const TRACE_ID_PART = '(?![0]{32})[\\da-f]{32}'; const PARENT_ID_PART = '(?![0]{16})[\\da-f]{16}'; const FLAGS_PART = '[\\da-f]{2}'; const TRACE_PARENT_REGEX = new RegExp(`^\\s?(${VERSION_PART})-(${TRACE_ID_PART})-(${PARENT_ID_PART})-(${FLAGS_PART})(-.*)?\\s?$`); +let currentTraceFlags = 0; function generateSpan(type) { - return nsolidApi.traceFlags[0] & type; + return getTraceFlags() & type; +} + +function getTraceFlags() { + if (nsolidApi.traceFlags !== undefined) { + currentTraceFlags = nsolidApi.traceFlags[0]; + } + return currentTraceFlags; } function parseTraceParent(traceParent) { @@ -69,13 +77,15 @@ function extractSpanContextFromHttpHeaders(context, headers) { } const nsolidTracer = new EventEmitter(); -binding.setToggleTracingFn(() => { - nsolidTracer.emit('flagsUpdated'); +binding.setToggleTracingFn((flags) => { + currentTraceFlags = flags; + nsolidTracer.emit('flagsUpdated', flags); }); module.exports = { extractSpanContextFromHttpHeaders, generateSpan, + getTraceFlags, nsolidTracer, }; diff --git a/lib/internal/otel/core.js b/lib/internal/otel/core.js index bcdbe981d06..9c23910a4b4 100644 --- a/lib/internal/otel/core.js +++ b/lib/internal/otel/core.js @@ -4,7 +4,6 @@ const { ArrayIsArray, } = primordials; -const binding = internalBinding('nsolid_api'); const { codes: { ERR_INVALID_ARG_TYPE, @@ -28,16 +27,16 @@ function register(api) { // TODO(santigimeno): perform some kind of validation that the api is actually // the OTEL api. api_ = api; - const { nsolidTracer } = require('internal/nsolid_trace'); - nsolidTracer.on('flagsUpdated', () => { - if (binding.trace_flags[0]) { + const { getTraceFlags, nsolidTracer } = require('internal/nsolid_trace'); + nsolidTracer.on('flagsUpdated', (flags) => { + if (flags) { enableApi(); } else { disableApi(); } }); - if (binding.trace_flags[0]) { + if (getTraceFlags()) { return enableApi(); } @@ -55,7 +54,8 @@ function registerInstrumentations(instrumentations) { instrumentations); } - if (binding.trace_flags[0]) { + const { getTraceFlags } = require('internal/nsolid_trace'); + if (getTraceFlags()) { enableInsts(instrumentations); } else { disableInsts(instrumentations); diff --git a/lib/internal/otel/trace.js b/lib/internal/otel/trace.js index bf84938a0dc..d0789ea374a 100644 --- a/lib/internal/otel/trace.js +++ b/lib/internal/otel/trace.js @@ -13,6 +13,9 @@ const { nsolid_consts } = binding; const { getApi, } = require('internal/otel/core'); +const { + getTraceFlags, +} = require('internal/nsolid_trace'); const { newInternalSpanId, @@ -251,7 +254,7 @@ Span.prototype._pushSpanDataUint64 = function(type, name) { class Tracer { startSpan(name, options = {}, context) { const api = getApi(); - if (binding.trace_flags[0] === 0) { + if (!options.internal && getTraceFlags() === 0) { return api.trace.wrapSpanContext(api.INVALID_SPAN_CONTEXT); } diff --git a/src/nsolid/nsolid_api.cc b/src/nsolid/nsolid_api.cc index b879e506336..ae059aa2d53 100644 --- a/src/nsolid/nsolid_api.cc +++ b/src/nsolid/nsolid_api.cc @@ -1812,7 +1812,7 @@ void EnvList::update_tracing_flags(SharedEnvInst envinst_sp, uint32_t flags) { HandleScope handle_scope(isolate); Context::Scope context_scope(env->context()); Local argv[] = { - v8::Boolean::New(isolate, flags > 0) + Uint32::New(isolate, flags) }; // We don't care if Call throws or exits. So ignore the return value. diff --git a/test/addons/nsolid-tracing/binding.cc b/test/addons/nsolid-tracing/binding.cc index 442aa193477..b4b2e42a138 100644 --- a/test/addons/nsolid-tracing/binding.cc +++ b/test/addons/nsolid-tracing/binding.cc @@ -117,6 +117,11 @@ class Trace { Tracer* tracer_ = nullptr; std::queue spans_; json expected_traces_ = {}; +bool at_exit_registered_ = false; +bool check_expected_traces_ = true; + +static constexpr uint32_t kDefaultTraceFlags = + kSpanDns | kSpanHttpClient | kSpanHttpServer | kSpanCustom; // NOLINTNEXTLINE(runtime/references) @@ -166,6 +171,10 @@ static void at_exit_cb() { fprintf(stderr, "traces_array: %s\n", traces_array.dump(4).c_str()); // fprintf(stderr, "expected_traces: %s\n", expected_traces_.dump(4).c_str()); + if (!check_expected_traces_) { + return; + } + assert(traces_array.size() == expected_traces_.size()); for (auto i = traces_array.begin(); i != traces_array.end(); ++i) { for (auto j = expected_traces_.begin(); @@ -242,22 +251,46 @@ static void ExpectedTrace(const v8::FunctionCallbackInfo& args) { } static void SetupTracing(const v8::FunctionCallbackInfo& args) { - assert(0 == args.Length()); + assert(args.Length() <= 1); v8::Isolate* isolate = args.GetIsolate(); v8::Local context = isolate->GetCurrentContext(); node::nsolid::SharedEnvInst envinst = node::nsolid::GetLocalEnvInst(context); if (node::nsolid::IsMainThread(envinst)) { - tracer_ = Tracer::CreateInstance(kSpanDns | - kSpanHttpClient | - kSpanHttpServer | - kSpanCustom, got_trace); - atexit(at_exit_cb); + if (tracer_ != nullptr) { + return; + } + + uint32_t flags = kDefaultTraceFlags; + if (args.Length() == 1) { + assert(args[0]->IsUint32()); + flags = args[0].As()->Value(); + } + + tracer_ = Tracer::CreateInstance(flags, got_trace); + if (!at_exit_registered_) { + atexit(at_exit_cb); + at_exit_registered_ = true; + } } } +static void StopTracing(const v8::FunctionCallbackInfo& args) { + assert(0 == args.Length()); + delete tracer_; + tracer_ = nullptr; +} + +static void SkipExpectedTracesCheck( + const v8::FunctionCallbackInfo& args) { + assert(0 == args.Length()); + check_expected_traces_ = false; +} + NODE_MODULE_INIT(/* exports, module, context */) { NODE_SET_METHOD(exports, "expectedTrace", ExpectedTrace); NODE_SET_METHOD(exports, "setupTracing", SetupTracing); + NODE_SET_METHOD(exports, "stopTracing", StopTracing); + NODE_SET_METHOD(exports, "skipExpectedTracesCheck", SkipExpectedTracesCheck); #define V(Name, Val, Str) \ NODE_DEFINE_CONSTANT(exports, Name); NSOLID_SPAN_TYPES(V) diff --git a/test/addons/nsolid-tracing/test-otel-fetch-enable-disable-race.js b/test/addons/nsolid-tracing/test-otel-fetch-enable-disable-race.js new file mode 100644 index 00000000000..e6a203c085a --- /dev/null +++ b/test/addons/nsolid-tracing/test-otel-fetch-enable-disable-race.js @@ -0,0 +1,75 @@ +// Flags: --dns-result-order=ipv4first +'use strict'; + +const common = require('../../common'); +const assert = require('assert'); +const http = require('http'); +const { once } = require('events'); + +const bindingPath = require.resolve(`./build/${common.buildType}/binding`); +const binding = require(bindingPath); + +const concurrency = 16; +const fetchRounds = 150; +const toggleRounds = 400; + +async function main() { + binding.skipExpectedTracesCheck(); + binding.setupTracing(binding.kSpanHttpClient); + + const server = http.createServer((req, res) => { + req.resume(); + res.writeHead(200, { 'content-type': 'text/plain' }); + setImmediate(() => res.end('ok')); + }); + + server.listen(0, '127.0.0.1'); + await once(server, 'listening'); + const { port } = server.address(); + const url = `http://127.0.0.1:${port}/`; + + const failures = []; + + async function runFetches() { + for (let i = 0; i < fetchRounds; i++) { + const batch = Array.from({ length: concurrency }, async () => { + try { + const res = await fetch(url, { + method: 'POST', + body: 'payload', + }); + await res.text(); + } catch (err) { + failures.push(err); + } + }); + await Promise.all(batch); + } + } + + async function toggleTracing() { + for (let i = 0; i < toggleRounds; i++) { + binding.stopTracing(); + await new Promise(setImmediate); + binding.setupTracing(binding.kSpanHttpClient); + await new Promise(setImmediate); + } + } + + try { + await Promise.all([ + runFetches(), + toggleTracing(), + ]); + } finally { + binding.stopTracing(); + server.close(); + await once(server, 'close'); + } + + assert.deepStrictEqual(failures, []); +} + +main().then(common.mustCall()).catch((err) => { + throw err; +}); diff --git a/test/agents/test-grpc-tracing-race.mjs b/test/agents/test-grpc-tracing-race.mjs new file mode 100644 index 00000000000..941ca9d24ae --- /dev/null +++ b/test/agents/test-grpc-tracing-race.mjs @@ -0,0 +1,75 @@ +// Flags: --expose-internals +import { mustCall, mustSucceed } from '../common/index.mjs'; +import { + checkExitData, + GRPCServer, + TestClient, +} from '../common/nsolid-grpc-agent/index.js'; + +const traceBursts = 100; +const toggleRounds = 20; + +async function runRepro(getEnv, kind) { + return new Promise((resolve, reject) => { + const grpcServer = new GRPCServer(); + grpcServer.start(mustSucceed(async (port) => { + const env = getEnv(port); + const opts = { + stdio: ['inherit', 'inherit', 'inherit', 'ipc'], + env, + }; + + const client = new TestClient([], opts); + const agentId = await client.id(); + + grpcServer.on('exit', mustCall((data) => { + checkExitData(data.msg, data.metadata, agentId, { code: 0, error: null, profile: '' }); + grpcServer.close(); + resolve(); + })); + + // send lots of trace requests to trigger tracing + for (let i = 0; i < traceBursts; i++) { + client.tracing(kind, 0); + } + + // toggle tracing on and off + for (let i = 0; i < toggleRounds; i++) { + const enabled = (i % 2) !== 0; + await grpcServer.reconfigure(agentId, { tracingEnabled: enabled }); + } + + await client.shutdown(); + })); + }); +} + +const tests = []; +tests.push({ + name: 'should reproduce fetch tracing crash via grpc reconfigure', + test: async (getEnv) => runRepro(getEnv, 'fetch'), +}); + +tests.push({ + name: 'should reproduce http tracing crash via grpc reconfigure', + test: async (getEnv) => runRepro(getEnv, 'http'), +}); + +const testConfigs = [ + { + getEnv: (port) => ({ + NODE_DEBUG_NATIVE: 'nsolid_grpc_agent', + NSOLID_GRPC_INSECURE: 1, + NSOLID_GRPC: `localhost:${port}`, + NSOLID_TRACING_ENABLED: 1, + NSOLID_INTERVAL: 100000, + }), + }, +]; + +for (const testConfig of testConfigs) { + for (const { name, test } of tests) { + console.log(`[tracing] ${name}`); + await test(testConfig.getEnv); + } +} diff --git a/test/common/nsolid-grpc-agent/client.js b/test/common/nsolid-grpc-agent/client.js index f094e528b91..5acca808308 100644 --- a/test/common/nsolid-grpc-agent/client.js +++ b/test/common/nsolid-grpc-agent/client.js @@ -46,6 +46,33 @@ function execHttpTransaction() { }); } +function execFetchTransaction() { + const server = http.createServer((req, res) => { + req.resume(); + setTimeout(() => { + res.writeHead(200, { 'Content-Type': 'text/plain' }); + res.end('Hello World\n'); + }, 10); + }); + + server.listen(0, '127.0.0.1', async () => { + const port = server.address().port; + const url = `http://127.0.0.1:${port}/`; + for (let i = 0; i < 25; i++) { + const responses = await Promise.all(Array.from({ length: 32 }, () => { + return fetch(url, { + method: 'POST', + body: 'payload', + }); + })); + + await Promise.all(responses.map((response) => response.text())); + } + + server.close(); + }); +} + function execDnsTransaction() { const dns = require('node:dns'); dns.lookup('example.org', () => { @@ -97,6 +124,9 @@ function handleTrace(msg) { case 'http': execHttpTransaction(); break; + case 'fetch': + execFetchTransaction(); + break; case 'dns': execDnsTransaction(); break; @@ -217,6 +247,11 @@ if (isMainThread) { // immediately without the need of calling nsolid.start() nsolid.start(); execHttpTransaction(); + } else if (trace === 'fetch') { + // TODO(santigimeno): ideally we should be able to collect traces + // immediately without the need of calling nsolid.start() + nsolid.start(); + execFetchTransaction(); } else if (trace === 'dns') { execDnsTransaction(); } else if (trace === 'custom') { From dbd0174c8da7a97e629cfc75b8825de63df203af Mon Sep 17 00:00:00 2001 From: Santiago Gimeno Date: Thu, 19 Mar 2026 17:25:46 +0100 Subject: [PATCH 2/2] test: fix linting in test-nsolid-file-handle-count --- test/parallel/test-nsolid-file-handle-count.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parallel/test-nsolid-file-handle-count.js b/test/parallel/test-nsolid-file-handle-count.js index 0376053a508..61cae057cba 100644 --- a/test/parallel/test-nsolid-file-handle-count.js +++ b/test/parallel/test-nsolid-file-handle-count.js @@ -56,7 +56,7 @@ fs.readFile(__filename, () => { assert.strictEqual(getClosed(), ++cCntr); checkPromise() - .then(common.mustCall(closePromiseFd)); + .then(common.mustCall((fh) => closePromiseFd(fh))); })); })); });