From 375a1e068250d229af0e4451cbdda9ef83bed930 Mon Sep 17 00:00:00 2001 From: BANANASJIM Date: Tue, 9 Jun 2026 05:02:13 -0700 Subject: [PATCH 1/3] fix(android): error instead of crash-prone adb:// fallback _resolve_android_url fell back to returning an adb://SERIAL URL when no adb forward existed; passing that URL to the daemon crashes it. Raise a clear UsageError pointing at 'rdc android setup --serial' instead, and update tests to mock the forwarded-port lookup. --- src/rdc/commands/session.py | 5 +++-- tests/unit/test_session_commands.py | 33 ++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/rdc/commands/session.py b/src/rdc/commands/session.py index fe0db22..57adee4 100644 --- a/src/rdc/commands/session.py +++ b/src/rdc/commands/session.py @@ -95,8 +95,9 @@ def _resolve_android_url(serial: str | None) -> str: port = _adb_forwarded_port(dev_serial) if port is not None: return f"localhost:{port}" - # Fallback to adb:// URL if no forward found - return f"adb://{dev_serial}" + raise click.UsageError( + f"adb forward not found for {dev_serial}; run: rdc android setup --serial {dev_serial}" + ) def _complete_capture_path( diff --git a/tests/unit/test_session_commands.py b/tests/unit/test_session_commands.py index dd6467a..2b1ce66 100644 --- a/tests/unit/test_session_commands.py +++ b/tests/unit/test_session_commands.py @@ -204,29 +204,49 @@ def _setup_data_dir(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: return data +def _forward(monkeypatch: pytest.MonkeyPatch, port: int | None) -> None: + """Stub adb-forward lookup to return *port* (or None).""" + monkeypatch.setattr("rdc.commands.session._adb_forwarded_port", lambda serial: port) + + def test_resolve_android_url_with_serial(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: _setup_data_dir(monkeypatch, tmp_path) + _forward(monkeypatch, 27015) save_remote_state(RemoteServerState(host="adb://DEV1", port=0, connected_at=1000.0)) result = _resolve_android_url(serial="DEV1") - assert result == "adb://DEV1" + assert result == "localhost:27015" def test_resolve_android_url_no_serial_uses_latest( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: _setup_data_dir(monkeypatch, tmp_path) + _forward(monkeypatch, 27016) save_remote_state(RemoteServerState(host="adb://OLD", port=0, connected_at=500.0)) save_remote_state(RemoteServerState(host="adb://NEW", port=0, connected_at=2000.0)) result = _resolve_android_url(serial=None) - assert result == "adb://NEW" + assert result == "localhost:27016" + + +def test_resolve_android_url_no_forward_errors( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """No adb forward -> clean UsageError, never a crash-prone adb:// URL.""" + _setup_data_dir(monkeypatch, tmp_path) + _forward(monkeypatch, None) + save_remote_state(RemoteServerState(host="adb://DEV1", port=0, connected_at=1000.0)) + + with pytest.raises(click.UsageError, match="adb forward not found for DEV1"): + _resolve_android_url(serial="DEV1") def test_resolve_android_url_serial_not_found( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: _setup_data_dir(monkeypatch, tmp_path) + _forward(monkeypatch, 27017) save_remote_state(RemoteServerState(host="adb://AAA", port=0, connected_at=1000.0)) with pytest.raises(click.UsageError, match="ZZZ"): @@ -245,11 +265,12 @@ def test_resolve_android_url_ignores_non_adb( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: _setup_data_dir(monkeypatch, tmp_path) + _forward(monkeypatch, 27018) save_remote_state(RemoteServerState(host="192.168.1.10", port=8888, connected_at=9999.0)) save_remote_state(RemoteServerState(host="adb://ABC123", port=0, connected_at=1000.0)) result = _resolve_android_url(serial=None) - assert result == "adb://ABC123" + assert result == "localhost:27018" # --------------------------------------------------------------------------- @@ -294,6 +315,7 @@ def _fake_send(host: str, port: int, payload: dict[str, Any], **kw: object) -> d def test_open_android_resolves_adb_url(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: _setup_data_dir(monkeypatch, tmp_path) monkeypatch.delenv("RDC_SESSION", raising=False) + _forward(monkeypatch, 27015) save_remote_state(RemoteServerState(host="adb://ABC123", port=0, connected_at=1000.0)) recorded = _mock_daemon_capture(monkeypatch) capture_file = tmp_path / "capture.rdc" @@ -302,7 +324,7 @@ def test_open_android_resolves_adb_url(monkeypatch: pytest.MonkeyPatch, tmp_path result = CliRunner().invoke(main, ["open", str(capture_file), "--android"]) assert result.exit_code == 0, result.output + (result.stderr or "") assert len(recorded["calls"]) == 1 - assert recorded["calls"][0]["kwargs"]["remote_url"] == "adb://ABC123" + assert recorded["calls"][0]["kwargs"]["remote_url"] == "localhost:27015" def test_open_gpu_passed_to_daemon(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: @@ -334,6 +356,7 @@ def test_open_gpu_ignored_with_connect(monkeypatch: pytest.MonkeyPatch, tmp_path def test_open_android_with_serial(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: _setup_data_dir(monkeypatch, tmp_path) monkeypatch.delenv("RDC_SESSION", raising=False) + _forward(monkeypatch, 27019) save_remote_state(RemoteServerState(host="adb://AAA", port=0, connected_at=2000.0)) save_remote_state(RemoteServerState(host="adb://BBB", port=0, connected_at=1000.0)) recorded = _mock_daemon_capture(monkeypatch) @@ -342,7 +365,7 @@ def test_open_android_with_serial(monkeypatch: pytest.MonkeyPatch, tmp_path: Pat result = CliRunner().invoke(main, ["open", str(capture_file), "--android", "--serial", "BBB"]) assert result.exit_code == 0, result.output + (result.stderr or "") - assert recorded["calls"][0]["kwargs"]["remote_url"] == "adb://BBB" + assert recorded["calls"][0]["kwargs"]["remote_url"] == "localhost:27019" def test_open_android_no_state_fails(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: From 4c94a7165be49c70ae9562a74bb722f7f0100d47 Mon Sep 17 00:00:00 2001 From: BANANASJIM Date: Tue, 9 Jun 2026 05:02:21 -0700 Subject: [PATCH 2/3] fix(daemon): run remote cleanup on idle-timeout exit The idle-timeout exit path in run_server skipped the remote CloseCapture/ShutdownConnection that _handle_shutdown performs, leaking the remote replay session on the remoteserver. Extract the teardown into a shared cleanup_state() helper and call it from both _handle_shutdown and the idle-timeout exit. --- src/rdc/daemon_server.py | 58 ++++++++++++++++++++++++++++++ src/rdc/handlers/core.py | 49 ++----------------------- tests/unit/test_handlers_remote.py | 35 ++++++++++++++++++ 3 files changed, 96 insertions(+), 46 deletions(-) diff --git a/src/rdc/daemon_server.py b/src/rdc/daemon_server.py index 5cc57c3..ab72b98 100644 --- a/src/rdc/daemon_server.py +++ b/src/rdc/daemon_server.py @@ -72,6 +72,7 @@ "_seek_replay", "_set_frame_event", "_start_ping_thread", + "cleanup_state", "_stop_ping_thread", "main", "run_server", @@ -488,6 +489,55 @@ def _stop_ping_thread(state: DaemonState) -> None: state._ping_thread.join(timeout=5.0) +def cleanup_state(state: DaemonState) -> None: + """Release all replay resources held by *state*. + + Shared teardown for both the explicit ``shutdown`` RPC and the + idle-timeout exit path, so a daemon that times out still closes the + remote replay session (CloseCapture + ShutdownConnection) instead of + leaking it on the remoteserver. Every step is best-effort. + """ + if state.replay_output is not None: + try: + state.replay_output.Shutdown() + except Exception: # noqa: BLE001 + pass + state.replay_output = None + if state.adapter is not None: + controller = state.adapter.controller + try: + for rid_obj in state.shader_replacements.values(): + controller.RemoveReplacement(rid_obj) + for rid_obj in state.built_shaders.values(): + controller.FreeTargetResource(rid_obj) + except Exception: # noqa: BLE001 + pass + state.shader_replacements.clear() + state.built_shaders.clear() + _cleanup_temp(state) + state.temp_dir = None + _cleanup_temp_capture(state) + if state.is_remote: + _stop_ping_thread(state) + if state.remote is not None and state.adapter is not None: + try: + state.remote.CloseCapture(state.adapter.controller) + except Exception: # noqa: BLE001 + pass + try: + state.remote.ShutdownConnection() + except Exception: # noqa: BLE001 + pass + elif state.adapter is not None: + state.adapter.shutdown() + if state.cap is not None: + try: + state.cap.Shutdown() + except Exception: # noqa: BLE001 + pass + state.cap = None + + def _cleanup_temp_capture(state: DaemonState) -> None: """Remove temp metadata directory created for remote replay, if any.""" if not state.local_capture_is_temp: @@ -678,9 +728,11 @@ def run_server( # pragma: no cover server.settimeout(1.0) running = True + idle_exit = False last_activity = time.time() while running: if idle_timeout_s > 0 and time.time() - last_activity > idle_timeout_s: + idle_exit = True break try: @@ -735,6 +787,12 @@ def run_server( # pragma: no cover pass last_activity = time.time() + # The shutdown RPC runs cleanup via _handle_shutdown before exiting the + # loop; an idle-timeout exit must release replay resources here so the + # remote replay session is not leaked on the remoteserver. + if idle_exit: + cleanup_state(state) + def main() -> None: # pragma: no cover _platform.install_shutdown_signal() diff --git a/src/rdc/handlers/core.py b/src/rdc/handlers/core.py index be5a5c1..c5ce610 100644 --- a/src/rdc/handlers/core.py +++ b/src/rdc/handlers/core.py @@ -82,52 +82,9 @@ def _handle_count( def _handle_shutdown( request_id: int, params: dict[str, Any], state: DaemonState ) -> tuple[dict[str, Any], bool]: - if state.replay_output is not None: - try: - state.replay_output.Shutdown() - except Exception: # noqa: BLE001 - pass - state.replay_output = None - if state.adapter is not None: - controller = state.adapter.controller - try: - for rid_obj in state.shader_replacements.values(): - controller.RemoveReplacement(rid_obj) - for rid_obj in state.built_shaders.values(): - controller.FreeTargetResource(rid_obj) - except Exception: # noqa: BLE001 - pass - state.shader_replacements.clear() - state.built_shaders.clear() - if state.temp_dir is not None: - import shutil - - shutil.rmtree(state.temp_dir, ignore_errors=True) - if state.local_capture_is_temp and state.local_capture_path: - import shutil - from pathlib import Path - - shutil.rmtree(str(Path(state.local_capture_path).parent), ignore_errors=True) - state.local_capture_is_temp = False - if state.is_remote: - if state._ping_stop is not None: - state._ping_stop.set() - if state._ping_thread is not None: - state._ping_thread.join(timeout=5.0) - if state.remote is not None and state.adapter is not None: - try: - state.remote.CloseCapture(state.adapter.controller) - except Exception: # noqa: BLE001 - pass - try: - state.remote.ShutdownConnection() - except Exception: # noqa: BLE001 - pass - else: - if state.adapter is not None: - state.adapter.shutdown() - if state.cap is not None: - state.cap.Shutdown() + from rdc.daemon_server import cleanup_state # noqa: PLC0415 + + cleanup_state(state) return _result_response(request_id, {"ok": True}), False diff --git a/tests/unit/test_handlers_remote.py b/tests/unit/test_handlers_remote.py index ac4e698..429fdee 100644 --- a/tests/unit/test_handlers_remote.py +++ b/tests/unit/test_handlers_remote.py @@ -7,6 +7,7 @@ from conftest import make_daemon_state +from rdc.daemon_server import cleanup_state from rdc.handlers.core import _handle_shutdown, _handle_status from rdc.handlers.texture import _handle_rt_overlay @@ -75,6 +76,40 @@ def test_shutdown_remote_shuts_down_cap(self) -> None: mock_cap.Shutdown.assert_called_once() +class TestCleanupState: + def test_remote_runs_close_and_shutdown_connection(self) -> None: + """Shared helper used by both shutdown RPC and idle-timeout exit.""" + state = _make_state(is_remote=True) + cleanup_state(state) + state.remote.CloseCapture.assert_called_once_with(state.adapter.controller) + state.remote.ShutdownConnection.assert_called_once() + + def test_remote_does_not_call_adapter_shutdown(self) -> None: + state = _make_state(is_remote=True) + mock_adapter = MagicMock() + mock_adapter.controller = MagicMock() + state.adapter = mock_adapter # type: ignore[assignment] + cleanup_state(state) + mock_adapter.shutdown.assert_not_called() + + def test_local_calls_adapter_shutdown_not_connection(self) -> None: + state = _make_state(is_remote=False) + mock_adapter = MagicMock() + mock_adapter.controller = MagicMock() + state.adapter = mock_adapter # type: ignore[assignment] + cleanup_state(state) + mock_adapter.shutdown.assert_called_once() + + def test_remote_stops_ping_thread(self) -> None: + state = _make_state(is_remote=True) + stop_event = threading.Event() + state._ping_stop = stop_event + state._ping_thread = MagicMock() + cleanup_state(state) + assert stop_event.is_set() + state._ping_thread.join.assert_called_once_with(timeout=5.0) + + class TestStatusRemote: def test_remote_includes_remote_fields(self) -> None: state = _make_state(is_remote=True) From 944a73b17699d61e4dc4f075a53d862456aa2cf4 Mon Sep 17 00:00:00 2001 From: BANANASJIM Date: Tue, 9 Jun 2026 05:02:31 -0700 Subject: [PATCH 3/3] fix(session): drain daemon stderr to avoid remote-startup deadlock (#47 regression) start_daemon spawned the daemon with stderr=PIPE but wait_for_ping never drained it; during remote replay the daemon's verbose upload logging filled the ~64KB pipe and blocked before serving its first ping, so open failed with a startup timeout. Redirect daemon stderr to a temp file (no buffer limit) and read its tail on failure, applied to both open_session and listen_open_session. Adds a regression test spawning a child that floods >128KB to stderr then sleeps, asserting startup does not hang. --- src/rdc/services/session_service.py | 65 ++++++++++++++++++---- tests/unit/test_quickfix_batch.py | 31 ++++++++--- tests/unit/test_session_service.py | 85 +++++++++++++++++++++++++++-- tests/unit/test_split_core.py | 4 +- 4 files changed, 161 insertions(+), 24 deletions(-) diff --git a/src/rdc/services/session_service.py b/src/rdc/services/session_service.py index 3ac3d23..2989026 100644 --- a/src/rdc/services/session_service.py +++ b/src/rdc/services/session_service.py @@ -6,6 +6,7 @@ import socket import subprocess import sys +import tempfile import time from pathlib import Path from typing import Any @@ -69,13 +70,55 @@ def start_daemon( cmd += ["--remote-url", remote_url] elif not _renderdoc_available(): cmd.append("--no-replay") - return subprocess.Popen( + # Redirect daemon stderr to a temp file rather than a PIPE: during remote + # replay the daemon's verbose upload logging would fill the OS pipe buffer + # and block before serving its first ping if no one drains it, which deadlocks + # wait_for_ping. A file has no buffer limit and we read its tail on failure. + stderr_file = tempfile.NamedTemporaryFile( # noqa: SIM115 + mode="w+", prefix="rdc-daemon-", suffix=".stderr", delete=False + ) + proc = subprocess.Popen( cmd, stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, + stderr=stderr_file, text=True, **_platform.popen_flags(), ) + proc._rdc_stderr_path = stderr_file.name # type: ignore[attr-defined] + stderr_file.close() + return proc + + +def _read_daemon_stderr(proc: subprocess.Popen[str]) -> str: + """Return the tail of the daemon's redirected stderr, then remove the file.""" + path = getattr(proc, "_rdc_stderr_path", None) + if not path: + return "" + try: + text = Path(path).read_text(errors="replace") + except OSError: + return "" + finally: + try: + Path(path).unlink() + except OSError: + pass + return "\n".join(text.splitlines()[-20:]).strip() + + +def _discard_daemon_stderr(proc: subprocess.Popen[str]) -> None: + """Best-effort removal of the daemon's stderr temp file after a successful start. + + On POSIX the daemon keeps writing to the now-unlinked inode; on Windows the + file is still open so unlink may fail and is silently ignored (the OS reclaims + it on process exit). + """ + path = getattr(proc, "_rdc_stderr_path", None) + if path: + try: + Path(path).unlink() + except OSError: + pass def wait_for_ping( @@ -159,16 +202,17 @@ def open_session( break proc.terminate() try: - _, stderr = proc.communicate(timeout=5) + proc.wait(timeout=5) except subprocess.TimeoutExpired: proc.kill() proc.wait() - stderr = "" - if stderr and stderr.strip(): - detail = stderr.strip() + stderr = _read_daemon_stderr(proc) + if stderr: + detail = stderr else: return False, f"error: daemon failed to start ({detail}) -- hint: run 'rdc doctor'" + _discard_daemon_stderr(proc) create_session( capture=str(capture), host=host, @@ -422,15 +466,16 @@ def listen_open_session( if not ok: proc.terminate() try: - _, stderr = proc.communicate(timeout=5) + proc.wait(timeout=5) except subprocess.TimeoutExpired: proc.kill() proc.wait() - stderr = "" - if stderr and stderr.strip(): - detail = stderr.strip() + stderr = _read_daemon_stderr(proc) + if stderr: + detail = stderr return False, f"error: daemon failed to start ({detail}) -- hint: run 'rdc doctor'" + _discard_daemon_stderr(proc) create_session( capture=capture, host=connect_host, diff --git a/tests/unit/test_quickfix_batch.py b/tests/unit/test_quickfix_batch.py index 479d5aa..2325d6e 100644 --- a/tests/unit/test_quickfix_batch.py +++ b/tests/unit/test_quickfix_batch.py @@ -150,37 +150,52 @@ def test_proc_wait_called_on_timeout( mock_ping: MagicMock, mock_check: MagicMock, ) -> None: - """TC-2.1: proc.wait() called after communicate raises TimeoutExpired.""" + """TC-2.1: a hung terminate triggers kill()+wait() zombie cleanup.""" from rdc.services.session_service import open_session mock_proc = MagicMock() - mock_proc.communicate.side_effect = subprocess.TimeoutExpired(cmd="rdc", timeout=5) + + # wait(timeout=5) hangs on every attempt; the bare kill-path wait() returns. + def _wait(*_a: object, **kw: object) -> int: + if kw.get("timeout") is not None: + raise subprocess.TimeoutExpired(cmd="rdc", timeout=5) + return 0 + + mock_proc.wait.side_effect = _wait mock_start.return_value = mock_proc ok, msg = open_session(Path("test.rdc")) assert not ok - assert mock_proc.wait.call_count >= 1 + assert mock_proc.kill.call_count >= 1 + assert mock_proc.wait.call_count >= 2 @patch("rdc.services.session_service._check_existing_session", return_value=(False, None)) @patch("rdc.services.session_service.wait_for_ping", return_value=(False, "timeout")) @patch("rdc.services.session_service.start_daemon") - def test_normal_communicate_no_extra_wait( + def test_normal_failure_surfaces_stderr( self, mock_start: MagicMock, mock_ping: MagicMock, mock_check: MagicMock, + tmp_path: Path, ) -> None: - """TC-2.2: normal communicate path does not call proc.wait().""" + """TC-2.2: a clean (non-hung) daemon exit surfaces its stderr tail.""" from rdc.services.session_service import open_session + stderr_path = tmp_path / "daemon.stderr" mock_proc = MagicMock() - mock_proc.communicate.return_value = ("", "some error\n") - mock_start.return_value = mock_proc + mock_proc.wait.return_value = 0 + mock_proc._rdc_stderr_path = str(stderr_path) + + def _start(*_a: object, **_kw: object) -> MagicMock: + stderr_path.write_text("some error\n") + return mock_proc + + mock_start.side_effect = _start ok, msg = open_session(Path("test.rdc")) assert not ok assert "some error" in msg - assert mock_proc.wait.call_count == 0 # --------------------------------------------------------------------------- diff --git a/tests/unit/test_session_service.py b/tests/unit/test_session_service.py index a98a164..b93fd1b 100644 --- a/tests/unit/test_session_service.py +++ b/tests/unit/test_session_service.py @@ -1,6 +1,8 @@ from __future__ import annotations import inspect +import subprocess +import sys import time from pathlib import Path from unittest.mock import MagicMock @@ -128,15 +130,21 @@ def test_open_session_reports_stderr_on_failure( monkeypatch.setattr(session_service, "load_session", lambda: None) monkeypatch.setattr(session_service, "_renderdoc_available", lambda: False) + stderr_path = tmp_path / "daemon.stderr" mock_proc = MagicMock() mock_proc.poll.return_value = 1 mock_proc.returncode = 1 mock_proc.pid = 999 mock_proc.kill.return_value = None - mock_proc.communicate.return_value = ("", "some error msg\n") + mock_proc._rdc_stderr_path = str(stderr_path) + + def _start(*_a: object, **_kw: object) -> MagicMock: + # Each spawn writes a fresh stderr file, as real start_daemon does. + stderr_path.write_text("some error msg\n") + return mock_proc detail = (False, "process exited: exit code 1") - monkeypatch.setattr(session_service, "start_daemon", lambda *a, **kw: mock_proc) + monkeypatch.setattr(session_service, "start_daemon", _start) monkeypatch.setattr(session_service, "wait_for_ping", lambda *a, **kw: detail) ok, msg = session_service.open_session(Path("test.rdc")) @@ -151,12 +159,14 @@ def test_open_session_failure_with_empty_stderr( monkeypatch.setattr(session_service, "load_session", lambda: None) monkeypatch.setattr(session_service, "_renderdoc_available", lambda: False) + stderr_path = tmp_path / "daemon.stderr" + stderr_path.write_text("") mock_proc = MagicMock() mock_proc.poll.return_value = 1 mock_proc.returncode = 1 mock_proc.pid = 999 mock_proc.kill.return_value = None - mock_proc.communicate.return_value = ("", "") + mock_proc._rdc_stderr_path = str(stderr_path) detail = (False, "process exited: exit code 1") monkeypatch.setattr(session_service, "start_daemon", lambda *a, **kw: mock_proc) @@ -235,7 +245,7 @@ def test_open_session_retries_on_port_conflict( mock_proc = MagicMock() mock_proc.pid = 999 mock_proc.kill.return_value = None - mock_proc.communicate.return_value = ("", "") + mock_proc._rdc_stderr_path = None monkeypatch.setattr(session_service, "start_daemon", lambda *a, **kw: mock_proc) @@ -262,7 +272,7 @@ def test_open_session_all_retries_fail(monkeypatch: pytest.MonkeyPatch, tmp_path mock_proc = MagicMock() mock_proc.pid = 999 mock_proc.kill.return_value = None - mock_proc.communicate.return_value = ("", "") + mock_proc._rdc_stderr_path = None monkeypatch.setattr(session_service, "start_daemon", lambda *a, **kw: mock_proc) monkeypatch.setattr(session_service, "wait_for_ping", lambda *a, **kw: (False, "port in use")) @@ -381,3 +391,68 @@ def test_close_session_tree_kill_on_hang(monkeypatch: pytest.MonkeyPatch, tmp_pa ok, msg = session_service.close_session() assert ok is True assert 888 in tree_killed + + +# --------------------------------------------------------------------------- +# Regression: daemon stderr must never deadlock startup (#47 PIPE regression) +# --------------------------------------------------------------------------- + +# A child that floods stderr with >128KB then sleeps, never serving a ping. +# Mirrors a remote daemon whose verbose upload logging would fill a PIPE and +# block before its first ping if stderr were not drained. +_FLOOD_CHILD = ( + "import sys, time;" + "sys.stderr.write('uploading: 0%\\n' * 12000);" + "sys.stderr.flush();" + "time.sleep(30)" +) + + +def test_open_session_does_not_hang_on_flooded_stderr( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """open_session must return promptly even when the daemon floods stderr. + + With the old stderr=PIPE + undrained wait_for_ping, the child blocks on a + full pipe and the ping never comes, so wait_for_ping spins for the whole + timeout while the pipe stays full. This asserts the call returns well inside + the child's 30s sleep and that the flooded stderr tail is still surfaced. + """ + monkeypatch.setattr("rdc._platform.data_dir", lambda: tmp_path / ".rdc") + monkeypatch.setenv("RDC_SESSION", "flood") + monkeypatch.setattr(session_service, "_renderdoc_available", lambda: False) + + spawned: list[subprocess.Popen[str]] = [] + real_popen = subprocess.Popen + + def _flood_start(*_a: object, **_kw: object) -> subprocess.Popen[str]: + import tempfile # noqa: PLC0415 + + stderr_file = tempfile.NamedTemporaryFile( # noqa: SIM115 + mode="w+", prefix="rdc-daemon-", suffix=".stderr", delete=False + ) + proc = real_popen( + [sys.executable, "-c", _FLOOD_CHILD], + stdout=subprocess.DEVNULL, + stderr=stderr_file, + text=True, + ) + proc._rdc_stderr_path = stderr_file.name # type: ignore[attr-defined] + stderr_file.close() + spawned.append(proc) + return proc + + monkeypatch.setattr(session_service, "start_daemon", _flood_start) + + start = time.monotonic() + ok, msg = session_service.open_session(Path("flood.rdc"), timeout=2.0) + elapsed = time.monotonic() - start + + for proc in spawned: + if proc.poll() is None: + proc.kill() + proc.wait() + + assert ok is False, msg + assert elapsed < 15.0, f"open_session hung ({elapsed:.1f}s) -- stderr pipe deadlock" + assert "uploading" in msg, f"daemon stderr not surfaced: {msg!r}" diff --git a/tests/unit/test_split_core.py b/tests/unit/test_split_core.py index 731e933..d726db4 100644 --- a/tests/unit/test_split_core.py +++ b/tests/unit/test_split_core.py @@ -913,10 +913,12 @@ def test_listen_success(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) - def test_listen_daemon_fail(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: _setup_no_replay(monkeypatch, tmp_path) monkeypatch.setattr(session_service, "load_session", lambda: None) + stderr_path = tmp_path / "daemon.stderr" + stderr_path.write_text("bind failed") mock_proc = MagicMock() mock_proc.pid = 999 mock_proc.kill.return_value = None - mock_proc.communicate.return_value = ("", "bind failed") + mock_proc._rdc_stderr_path = str(stderr_path) monkeypatch.setattr(session_service, "start_daemon", lambda *a, **kw: mock_proc) monkeypatch.setattr(session_service, "wait_for_ping", lambda *a, **kw: (False, "timeout"))