Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/rdc/commands/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@ def _resolve_android_url(serial: str | None) -> str:
port = _adb_forwarded_port(dev_serial)
if port is not None:
return f"localhost:{port}"
# Fallback to adb:// URL if no forward found
return f"adb://{dev_serial}"
raise click.UsageError(
f"adb forward not found for {dev_serial}; run: rdc android setup --serial {dev_serial}"
)


def _complete_capture_path(
Expand Down
58 changes: 58 additions & 0 deletions src/rdc/daemon_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"_seek_replay",
"_set_frame_event",
"_start_ping_thread",
"cleanup_state",
"_stop_ping_thread",
"main",
"run_server",
Expand Down Expand Up @@ -488,6 +489,55 @@ def _stop_ping_thread(state: DaemonState) -> None:
state._ping_thread.join(timeout=5.0)


def cleanup_state(state: DaemonState) -> None:
"""Release all replay resources held by *state*.

Shared teardown for both the explicit ``shutdown`` RPC and the
idle-timeout exit path, so a daemon that times out still closes the
remote replay session (CloseCapture + ShutdownConnection) instead of
leaking it on the remoteserver. Every step is best-effort.
"""
if state.replay_output is not None:
try:
state.replay_output.Shutdown()
except Exception: # noqa: BLE001
pass
state.replay_output = None
if state.adapter is not None:
controller = state.adapter.controller
try:
for rid_obj in state.shader_replacements.values():
controller.RemoveReplacement(rid_obj)
for rid_obj in state.built_shaders.values():
controller.FreeTargetResource(rid_obj)
except Exception: # noqa: BLE001
pass
state.shader_replacements.clear()
state.built_shaders.clear()
_cleanup_temp(state)
state.temp_dir = None
_cleanup_temp_capture(state)
if state.is_remote:
_stop_ping_thread(state)
if state.remote is not None and state.adapter is not None:
try:
state.remote.CloseCapture(state.adapter.controller)
except Exception: # noqa: BLE001
pass
try:
state.remote.ShutdownConnection()
except Exception: # noqa: BLE001
pass
elif state.adapter is not None:
state.adapter.shutdown()
if state.cap is not None:
try:
state.cap.Shutdown()
except Exception: # noqa: BLE001
pass
state.cap = None


def _cleanup_temp_capture(state: DaemonState) -> None:
"""Remove temp metadata directory created for remote replay, if any."""
if not state.local_capture_is_temp:
Expand Down Expand Up @@ -678,9 +728,11 @@ def run_server( # pragma: no cover
server.settimeout(1.0)

running = True
idle_exit = False
last_activity = time.time()
while running:
if idle_timeout_s > 0 and time.time() - last_activity > idle_timeout_s:
idle_exit = True
break

try:
Expand Down Expand Up @@ -735,6 +787,12 @@ def run_server( # pragma: no cover
pass
last_activity = time.time()

# The shutdown RPC runs cleanup via _handle_shutdown before exiting the
# loop; an idle-timeout exit must release replay resources here so the
# remote replay session is not leaked on the remoteserver.
if idle_exit:
cleanup_state(state)


def main() -> None: # pragma: no cover
_platform.install_shutdown_signal()
Expand Down
49 changes: 3 additions & 46 deletions src/rdc/handlers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,52 +82,9 @@ def _handle_count(
def _handle_shutdown(
request_id: int, params: dict[str, Any], state: DaemonState
) -> tuple[dict[str, Any], bool]:
if state.replay_output is not None:
try:
state.replay_output.Shutdown()
except Exception: # noqa: BLE001
pass
state.replay_output = None
if state.adapter is not None:
controller = state.adapter.controller
try:
for rid_obj in state.shader_replacements.values():
controller.RemoveReplacement(rid_obj)
for rid_obj in state.built_shaders.values():
controller.FreeTargetResource(rid_obj)
except Exception: # noqa: BLE001
pass
state.shader_replacements.clear()
state.built_shaders.clear()
if state.temp_dir is not None:
import shutil

shutil.rmtree(state.temp_dir, ignore_errors=True)
if state.local_capture_is_temp and state.local_capture_path:
import shutil
from pathlib import Path

shutil.rmtree(str(Path(state.local_capture_path).parent), ignore_errors=True)
state.local_capture_is_temp = False
if state.is_remote:
if state._ping_stop is not None:
state._ping_stop.set()
if state._ping_thread is not None:
state._ping_thread.join(timeout=5.0)
if state.remote is not None and state.adapter is not None:
try:
state.remote.CloseCapture(state.adapter.controller)
except Exception: # noqa: BLE001
pass
try:
state.remote.ShutdownConnection()
except Exception: # noqa: BLE001
pass
else:
if state.adapter is not None:
state.adapter.shutdown()
if state.cap is not None:
state.cap.Shutdown()
from rdc.daemon_server import cleanup_state # noqa: PLC0415

cleanup_state(state)
return _result_response(request_id, {"ok": True}), False


Expand Down
65 changes: 55 additions & 10 deletions src/rdc/services/session_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import socket
import subprocess
import sys
import tempfile
import time
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -69,13 +70,55 @@ def start_daemon(
cmd += ["--remote-url", remote_url]
elif not _renderdoc_available():
cmd.append("--no-replay")
return subprocess.Popen(
# Redirect daemon stderr to a temp file rather than a PIPE: during remote
# replay the daemon's verbose upload logging would fill the OS pipe buffer
# and block before serving its first ping if no one drains it, which deadlocks
# wait_for_ping. A file has no buffer limit and we read its tail on failure.
stderr_file = tempfile.NamedTemporaryFile( # noqa: SIM115
mode="w+", prefix="rdc-daemon-", suffix=".stderr", delete=False
)
proc = subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
stderr=stderr_file,
text=True,
**_platform.popen_flags(),
)
proc._rdc_stderr_path = stderr_file.name # type: ignore[attr-defined]
stderr_file.close()
return proc


def _read_daemon_stderr(proc: subprocess.Popen[str]) -> str:
"""Return the tail of the daemon's redirected stderr, then remove the file."""
path = getattr(proc, "_rdc_stderr_path", None)
if not path:
return ""
try:
text = Path(path).read_text(errors="replace")
except OSError:
return ""
finally:
try:
Path(path).unlink()
except OSError:
pass
return "\n".join(text.splitlines()[-20:]).strip()


def _discard_daemon_stderr(proc: subprocess.Popen[str]) -> None:
"""Best-effort removal of the daemon's stderr temp file after a successful start.

On POSIX the daemon keeps writing to the now-unlinked inode; on Windows the
file is still open so unlink may fail and is silently ignored (the OS reclaims
it on process exit).
"""
path = getattr(proc, "_rdc_stderr_path", None)
if path:
try:
Path(path).unlink()
except OSError:
pass


def wait_for_ping(
Expand Down Expand Up @@ -159,16 +202,17 @@ def open_session(
break
proc.terminate()
try:
_, stderr = proc.communicate(timeout=5)
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait()
stderr = ""
if stderr and stderr.strip():
detail = stderr.strip()
stderr = _read_daemon_stderr(proc)
if stderr:
detail = stderr
else:
return False, f"error: daemon failed to start ({detail}) -- hint: run 'rdc doctor'"

_discard_daemon_stderr(proc)
create_session(
capture=str(capture),
host=host,
Expand Down Expand Up @@ -422,15 +466,16 @@ def listen_open_session(
if not ok:
proc.terminate()
try:
_, stderr = proc.communicate(timeout=5)
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait()
stderr = ""
if stderr and stderr.strip():
detail = stderr.strip()
stderr = _read_daemon_stderr(proc)
if stderr:
detail = stderr
return False, f"error: daemon failed to start ({detail}) -- hint: run 'rdc doctor'"

_discard_daemon_stderr(proc)
create_session(
capture=capture,
host=connect_host,
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test_handlers_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from conftest import make_daemon_state

from rdc.daemon_server import cleanup_state
from rdc.handlers.core import _handle_shutdown, _handle_status
from rdc.handlers.texture import _handle_rt_overlay

Expand Down Expand Up @@ -75,6 +76,40 @@ def test_shutdown_remote_shuts_down_cap(self) -> None:
mock_cap.Shutdown.assert_called_once()


class TestCleanupState:
def test_remote_runs_close_and_shutdown_connection(self) -> None:
"""Shared helper used by both shutdown RPC and idle-timeout exit."""
state = _make_state(is_remote=True)
cleanup_state(state)
state.remote.CloseCapture.assert_called_once_with(state.adapter.controller)
state.remote.ShutdownConnection.assert_called_once()

def test_remote_does_not_call_adapter_shutdown(self) -> None:
state = _make_state(is_remote=True)
mock_adapter = MagicMock()
mock_adapter.controller = MagicMock()
state.adapter = mock_adapter # type: ignore[assignment]
cleanup_state(state)
mock_adapter.shutdown.assert_not_called()

def test_local_calls_adapter_shutdown_not_connection(self) -> None:
state = _make_state(is_remote=False)
mock_adapter = MagicMock()
mock_adapter.controller = MagicMock()
state.adapter = mock_adapter # type: ignore[assignment]
cleanup_state(state)
mock_adapter.shutdown.assert_called_once()

def test_remote_stops_ping_thread(self) -> None:
state = _make_state(is_remote=True)
stop_event = threading.Event()
state._ping_stop = stop_event
state._ping_thread = MagicMock()
cleanup_state(state)
assert stop_event.is_set()
state._ping_thread.join.assert_called_once_with(timeout=5.0)


class TestStatusRemote:
def test_remote_includes_remote_fields(self) -> None:
state = _make_state(is_remote=True)
Expand Down
31 changes: 23 additions & 8 deletions tests/unit/test_quickfix_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,37 +150,52 @@ def test_proc_wait_called_on_timeout(
mock_ping: MagicMock,
mock_check: MagicMock,
) -> None:
"""TC-2.1: proc.wait() called after communicate raises TimeoutExpired."""
"""TC-2.1: a hung terminate triggers kill()+wait() zombie cleanup."""
from rdc.services.session_service import open_session

mock_proc = MagicMock()
mock_proc.communicate.side_effect = subprocess.TimeoutExpired(cmd="rdc", timeout=5)

# wait(timeout=5) hangs on every attempt; the bare kill-path wait() returns.
def _wait(*_a: object, **kw: object) -> int:
if kw.get("timeout") is not None:
raise subprocess.TimeoutExpired(cmd="rdc", timeout=5)
return 0

mock_proc.wait.side_effect = _wait
mock_start.return_value = mock_proc

ok, msg = open_session(Path("test.rdc"))
assert not ok
assert mock_proc.wait.call_count >= 1
assert mock_proc.kill.call_count >= 1
assert mock_proc.wait.call_count >= 2

@patch("rdc.services.session_service._check_existing_session", return_value=(False, None))
@patch("rdc.services.session_service.wait_for_ping", return_value=(False, "timeout"))
@patch("rdc.services.session_service.start_daemon")
def test_normal_communicate_no_extra_wait(
def test_normal_failure_surfaces_stderr(
self,
mock_start: MagicMock,
mock_ping: MagicMock,
mock_check: MagicMock,
tmp_path: Path,
) -> None:
"""TC-2.2: normal communicate path does not call proc.wait()."""
"""TC-2.2: a clean (non-hung) daemon exit surfaces its stderr tail."""
from rdc.services.session_service import open_session

stderr_path = tmp_path / "daemon.stderr"
mock_proc = MagicMock()
mock_proc.communicate.return_value = ("", "some error\n")
mock_start.return_value = mock_proc
mock_proc.wait.return_value = 0
mock_proc._rdc_stderr_path = str(stderr_path)

def _start(*_a: object, **_kw: object) -> MagicMock:
stderr_path.write_text("some error\n")
return mock_proc

mock_start.side_effect = _start

ok, msg = open_session(Path("test.rdc"))
assert not ok
assert "some error" in msg
assert mock_proc.wait.call_count == 0


# ---------------------------------------------------------------------------
Expand Down
Loading
Loading