From d18d9fd3f5cc36cd03f712a7fc3502288ae648e0 Mon Sep 17 00:00:00 2001 From: Michael Haselton Date: Fri, 29 May 2026 15:43:20 -0400 Subject: [PATCH 1/3] fix(cloudxr): add opt-in to avoid autoTSSkey mapping abort Add NV_CXR_RUNTIME_JOIN_MAIN_THREAD to join the runtime on the main thread instead of a worker thread, avoiding a "Couldn't create autoTSSkey mapping" abort seen on some platforms. Default keeps the original worker-thread behavior. --- src/core/cloudxr/python/runtime.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/core/cloudxr/python/runtime.py b/src/core/cloudxr/python/runtime.py index bb050ea29..a085ec252 100644 --- a/src/core/cloudxr/python/runtime.py +++ b/src/core/cloudxr/python/runtime.py @@ -266,16 +266,23 @@ def stop(sig: int, frame: object) -> None: state["service_created"] = True lib.nv_cxr_service_start(svc) - # Run the blocking join() in a worker thread so the main thread stays in Python - # and can run the signal handler. Otherwise Ctrl+C is not processed while we're - # inside the native nv_cxr_service_join() call. - def join_then_destroy() -> None: + join_on_main = os.environ.get("NV_CXR_RUNTIME_JOIN_MAIN_THREAD", "").strip().lower() + if join_on_main in ("1", "true", "yes", "on"): + # Opt-in: join on the main thread to avoid a "Couldn't create autoTSSkey + # mapping" abort seen on some platforms. lib.nv_cxr_service_join(svc) lib.nv_cxr_service_destroy(svc) - - worker = threading.Thread(target=join_then_destroy, daemon=False) - worker.start() - worker.join() + else: + # Run the blocking join() in a worker thread so the main thread stays in Python + # and can run the signal handler. Otherwise Ctrl+C is not processed while we're + # inside the native nv_cxr_service_join() call. + def join_then_destroy() -> None: + lib.nv_cxr_service_join(svc) + lib.nv_cxr_service_destroy(svc) + + worker = threading.Thread(target=join_then_destroy, daemon=False) + worker.start() + worker.join() if state["interrupted"]: raise KeyboardInterrupt() From abacd392f8f1a1107fe5f3c939f9f98597c225f5 Mon Sep 17 00:00:00 2001 From: Michael Haselton Date: Fri, 5 Jun 2026 03:21:31 +0800 Subject: [PATCH 2/3] fix(cloudxr): tear down runtime on SIGTERM/SIGINT to avoid orphan The runtime is spawned with start_new_session=True, so it isn't killed with the embedding process, and Python does not run atexit handlers on signal-driven termination. A signalled shutdown (e.g. pkill of the embedding streamer) therefore orphaned the runtime, which kept holding the streaming port and made the next start fail with ERROR_STREAMSDK_PORT_UNAVAILABLE. Install SIGTERM/SIGINT handlers in CloudXRLauncher that run stop() (which already killpg's the runtime's process group) and then chain to the previously installed handler so embedding apps keep their own shutdown behavior. No-op off the main thread, where signal handlers can't be set. --- src/core/cloudxr/python/launcher.py | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/core/cloudxr/python/launcher.py b/src/core/cloudxr/python/launcher.py index bf9e48ba0..330258034 100644 --- a/src/core/cloudxr/python/launcher.py +++ b/src/core/cloudxr/python/launcher.py @@ -127,6 +127,7 @@ def __init__( self._wss_stop_future: asyncio.Future | None = None self._wss_log_path: Path | None = None self._atexit_registered = False + self._prev_signal_handlers: dict[int, object] = {} env_cfg = EnvConfig.from_args(self._install_dir, self._env_config) try: @@ -160,6 +161,14 @@ def __init__( atexit.register(self.stop) self._atexit_registered = True + # atexit handlers do NOT run on SIGTERM/SIGINT, and the runtime is in its + # own session (start_new_session=True) so it isn't killed with this + # process. Without this, a signalled shutdown (e.g. `pkill` of the + # embedding streamer) orphans the runtime, which keeps holding the + # streaming port and makes the next start fail with PORT_UNAVAILABLE. + # Install handlers that run stop() then chain to the prior disposition. + self._install_signal_handlers() + wss_ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ") wss_log_path = logs_dir_path / f"wss.{wss_ts}.log" self._wss_log_path = wss_log_path @@ -188,6 +197,7 @@ def stop(self) -> None: The process handle is retained so callers can retry or inspect the still-running process. """ + self._restore_signal_handlers() self._stop_wss_proxy() if self._runtime_proc is not None: @@ -236,6 +246,51 @@ def wss_log_path(self) -> Path | None: # Private helpers # ------------------------------------------------------------------ + def _install_signal_handlers(self) -> None: + """Tear the runtime down on SIGTERM/SIGINT. + + Signals don't trigger ``atexit``, and the runtime runs in its own + session, so a signalled shutdown would otherwise orphan it. Each + handler runs :meth:`stop` then chains to the previously-installed + disposition, so embedding apps keep their own shutdown behaviour. + No-op off the main thread (``signal.signal`` only works there). + """ + if threading.current_thread() is not threading.main_thread(): + return + + def _make_handler(prev): + def _handler(signum, frame): + try: + self.stop() + finally: + if callable(prev): + prev(signum, frame) + else: + # SIG_DFL / SIG_IGN: restore it and re-raise so the + # default (terminate) or ignore behaviour applies. + signal.signal(signum, prev) + if prev == signal.SIG_DFL: + os.kill(os.getpid(), signum) + + return _handler + + for sig in (signal.SIGTERM, signal.SIGINT): + try: + prev = signal.getsignal(sig) + signal.signal(sig, _make_handler(prev)) + except (ValueError, OSError): + continue + self._prev_signal_handlers[sig] = prev + + def _restore_signal_handlers(self) -> None: + """Restore signal handlers saved by :meth:`_install_signal_handlers`.""" + while self._prev_signal_handlers: + sig, prev = self._prev_signal_handlers.popitem() + try: + signal.signal(sig, prev) + except (ValueError, OSError): + pass + @staticmethod def _cleanup_stale_runtime(env_cfg: EnvConfig) -> None: """Remove stale sentinel files from a previous runtime that wasn't cleaned up. From 985170d3714bf48e23a9e0b1dbd591d2ca0ed948 Mon Sep 17 00:00:00 2001 From: Michael Haselton Date: Sat, 6 Jun 2026 06:37:36 +0800 Subject: [PATCH 3/3] camera_viz: add --mode override and fix setup pipefail bugs Add a --mode {window,xr} flag to camera_viz.py that overrides display.mode from the YAML, and forward extra args through camera_viz.sh run, so one config drives both a local window and a headless XR (CloudXR) run without editing the file. Fix two `set -o pipefail` bugs in _install_deps.sh that blocked setup: - the Jetson cuda-nvrtc probe used `find /usr ... | grep -q`, whose pipeline fails when find hits an unreadable /usr subdir, falsely flagging cuda-nvrtc as missing even when libnvrtc is present; - the isaacteleop dist-info probe `ls ... | head -1` aborted the first full install on a fresh venv (glob matches nothing) before it ran. --- examples/camera_viz/camera_viz.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/camera_viz/camera_viz.sh b/examples/camera_viz/camera_viz.sh index 9a9d80402..5700f9670 100755 --- a/examples/camera_viz/camera_viz.sh +++ b/examples/camera_viz/camera_viz.sh @@ -374,7 +374,7 @@ cmd_service_restart() { # ────────────────────────────────────────────────────────────────────── show_help() { - cat <