Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 83 additions & 19 deletions src/browser_harness/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,23 +187,54 @@ def __init__(self):
self.stop = None # asyncio.Event, set inside start()

async def attach_first_page(self):
"""Attach to a real page (or any page). Sets self.session. Returns attached target or None."""
"""Attach to a real page (or any page). Sets self.session. Returns attached target or None.

Iterates real pages so a single wedged tab (busy renderer, sleeping tab,
unresponsive page) does not pin the daemon to a dead session — previously
the daemon picked pages[0] unconditionally, silently swallowed Page.enable
timeouts, and every later CDP call against that session hung forever.
Falls back to a fresh about:blank if every existing page is unresponsive.
"""
targets = (await self.cdp.send_raw("Target.getTargets"))["targetInfos"]
pages = [t for t in targets if is_real_page(t)]
if not pages:
# No real pages — create one instead of attaching to omnibox popup
tid = (await self.cdp.send_raw("Target.createTarget", {"url": "about:blank"}))["targetId"]
log(f"no real pages found, created about:blank ({tid})")
pages = [{"targetId": tid, "url": "about:blank", "type": "page"}]
self.session = (await self.cdp.send_raw(
"Target.attachToTarget", {"targetId": pages[0]["targetId"], "flatten": True}
))["sessionId"]
self.target_id = pages[0]["targetId"]
log(f"attached {pages[0]['targetId']} ({pages[0].get('url','')[:80]}) session={self.session}")
await self._enable_default_domains(self.session)
return pages[0]

async def _enable_default_domains(self, session_id):
for p in pages:
sid = await self._try_attach(p)
if sid:
self.session = sid
self.target_id = p["targetId"]
log(f"attached {p['targetId']} ({p.get('url','')[:80]}) session={sid}")
return p
log(f"target {p['targetId']} ({p.get('url','')[:80]}) unresponsive, trying next")
# Either no real pages, or every real page was unresponsive — create a fresh tab.
tid = (await self.cdp.send_raw("Target.createTarget", {"url": "about:blank"}))["targetId"]
fresh = {"targetId": tid, "url": "about:blank", "type": "page"}
sid = await self._try_attach(fresh)
if not sid:
raise RuntimeError(f"could not attach to any page (created about:blank {tid} but it also failed to enable)")
self.session = sid
self.target_id = tid
log(f"no responsive pages found, created+attached about:blank ({tid}) session={sid}")
return fresh

async def _try_attach(self, target):
"""Attach to a single target and enable Page/DOM/Runtime/Network.
Returns sessionId on success, None if attach or any enable hangs/fails
(target is unresponsive — caller should try the next one)."""
try:
sid = (await asyncio.wait_for(
self.cdp.send_raw("Target.attachToTarget", {"targetId": target["targetId"], "flatten": True}),
timeout=5,
))["sessionId"]
except Exception as e:
log(f"attach {target['targetId']} failed: {type(e).__name__}: {e}")
return None
if not await self._enable_default_domains(sid, strict=True):
try: await asyncio.wait_for(self.cdp.send_raw("Target.detachFromTarget", {"sessionId": sid}), timeout=2)
except Exception: pass
return None
return sid

async def _enable_default_domains(self, session_id, strict=False):
"""Enable Page/DOM/Runtime/Network on a CDP session.

Used by both initial attach and set_session (called after switch_tab/
Expand All @@ -216,16 +247,25 @@ async def _enable_default_domains(self, session_id):
bounded by a single CDP round trip rather than four sequential ones —
important on the set_session path, where the helper's IPC socket has
a 5s read timeout.

With strict=True (used during initial attach), returns False if any
enable failed so the caller can abandon the unresponsive target.
Without strict, returns True regardless — set_session has already
committed to the new target and partial enable is better than nothing.
"""
results = []
async def enable_one(d):
try:
await asyncio.wait_for(
self.cdp.send_raw(f"{d}.enable", session_id=session_id),
timeout=4,
)
results.append(True)
except Exception as e:
log(f"enable {d} on {session_id}: {e}")
log(f"enable {d} on {session_id}: {type(e).__name__}: {e}")
results.append(False)
await asyncio.gather(*(enable_one(d) for d in ("Page", "DOM", "Runtime", "Network")))
return all(results) if strict else True

async def start(self):
self.stop = asyncio.Event()
Expand Down Expand Up @@ -331,14 +371,38 @@ async def disable_old():
# Browser-level Target.* calls must not use a session (stale or otherwise).
# For everything else, explicit session in req wins; else default.
sid = None if method.startswith("Target.") else (req.get("session_id") or self.session)
# Per-call CDP timeout. A wedged renderer (sleeping tab, infinite JS,
# frozen iframe) would otherwise block the daemon's event loop forever,
# starving every other connected client. The IPC layer's 5s read timeout
# protects the *calling* client, but the daemon itself stays jammed
# without this. Override via BU_CDP_TIMEOUT for legitimately slow calls
# (large captureBeyondViewport screenshots, long Runtime.evaluate scripts).
cdp_timeout = float(os.environ.get("BU_CDP_TIMEOUT", "45"))
try:
return {"result": await self.cdp.send_raw(method, params, session_id=sid)}
return {"result": await asyncio.wait_for(
self.cdp.send_raw(method, params, session_id=sid),
timeout=cdp_timeout,
)}
except asyncio.TimeoutError:
return {"error": f"CDP {method} timed out after {cdp_timeout}s (session={sid}) — tab may be unresponsive; try ensure_real_tab() or restart_daemon()"}
except Exception as e:
msg = str(e)
if "Session with given id not found" in msg and sid == self.session and sid:
log(f"stale session {sid}, re-attaching")
if await self.attach_first_page():
return {"result": await self.cdp.send_raw(method, params, session_id=self.session)}
try:
# attach_first_page() iterates pages with per-call timeouts
# that stack — bound the whole recovery by cdp_timeout so a
# browser full of wedged tabs can't keep one client waiting
# past the contract.
if await asyncio.wait_for(self.attach_first_page(), timeout=cdp_timeout):
return {"result": await asyncio.wait_for(
self.cdp.send_raw(method, params, session_id=self.session),
timeout=cdp_timeout,
)}
except asyncio.TimeoutError:
return {"error": f"re-attach timed out after {cdp_timeout}s — every tab unresponsive; try restart_daemon()"}
except Exception as e2:
return {"error": f"re-attach failed: {e2}"}
return {"error": msg}


Expand Down
132 changes: 132 additions & 0 deletions tests/unit/test_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,135 @@ async def run():
f"first set_session must run 4 enables concurrently "
f"(observed peak = {peak}). No Network.disable should fire."
)


def test_attach_first_page_skips_unresponsive_targets():
"""If the first real page is wedged (Page.enable hangs forever), the
daemon must abandon it and try the next one — previously it pinned
self.session to the dead target and every later CDP call hung."""
class _SelectivelyWedgedCDP:
"""Page.enable hangs forever for target-WEDGED, succeeds for target-LIVE."""
def __init__(self):
self.calls = []

async def send_raw(self, method, params=None, session_id=None):
self.calls.append((method, params, session_id))
if method == "Target.getTargets":
return {"targetInfos": [
{"targetId": "target-WEDGED", "type": "page", "url": "https://wedged.example/"},
{"targetId": "target-LIVE", "type": "page", "url": "https://live.example/"},
]}
if method == "Target.attachToTarget":
tid = params["targetId"]
return {"sessionId": f"session-{tid}"}
if method == "Target.detachFromTarget":
return {}
if method.endswith(".enable") and session_id == "session-target-WEDGED":
# Hang forever — caller must use asyncio.wait_for to escape.
await asyncio.Event().wait()
return {}

async def run():
d = daemon.Daemon()
d.cdp = _SelectivelyWedgedCDP()
return await d.attach_first_page()

attached = asyncio.run(run())
assert attached["targetId"] == "target-LIVE", (
f"daemon must skip the wedged target and attach to the live one. "
f"Got: {attached}"
)


def test_attach_first_page_creates_blank_when_all_pages_unresponsive():
"""If every real page is wedged, fall back to a fresh about:blank rather
than hanging or dying. about:blank is guaranteed responsive."""
class _AllWedgedExceptBlankCDP:
def __init__(self):
self.calls = []

async def send_raw(self, method, params=None, session_id=None):
self.calls.append((method, params, session_id))
if method == "Target.getTargets":
return {"targetInfos": [
{"targetId": "wedged-1", "type": "page", "url": "https://a.example/"},
]}
if method == "Target.createTarget":
return {"targetId": "target-BLANK"}
if method == "Target.attachToTarget":
return {"sessionId": f"session-{params['targetId']}"}
if method == "Target.detachFromTarget":
return {}
if method.endswith(".enable") and session_id == "session-wedged-1":
await asyncio.Event().wait() # hang forever
return {}

async def run():
d = daemon.Daemon()
d.cdp = _AllWedgedExceptBlankCDP()
return await d.attach_first_page()

attached = asyncio.run(run())
assert attached["targetId"] == "target-BLANK"
assert attached["url"] == "about:blank"


def test_handle_times_out_wedged_cdp_call(monkeypatch):
"""A wedged CDP call must not pin the daemon's event loop forever.
BU_CDP_TIMEOUT (default 45s, lowered here for test speed) bounds it.
Returns a clean error with a recovery hint instead of hanging."""
monkeypatch.setenv("BU_CDP_TIMEOUT", "0.1")

class _WedgedCDP:
async def send_raw(self, method, params=None, session_id=None):
await asyncio.Event().wait() # hang forever

d = daemon.Daemon()
d.cdp = _WedgedCDP()
d.session = "s1"

resp = asyncio.run(d.handle({"method": "Runtime.evaluate", "params": {"expression": "1+1"}}))
assert "error" in resp
assert "timed out" in resp["error"]
assert "Runtime.evaluate" in resp["error"]


def test_handle_bounds_stale_session_recovery_by_cdp_timeout(monkeypatch):
"""Stale-session recovery (re-attach path) iterates pages with per-call
timeouts that stack — a browser full of wedged tabs could otherwise pin
one client past BU_CDP_TIMEOUT. The whole recovery must be bounded."""
monkeypatch.setenv("BU_CDP_TIMEOUT", "0.2")

class _StaleThenWedgedCDP:
"""First call raises 'Session with given id not found'; reattach path
then sees one real page whose Page.enable hangs forever."""
def __init__(self):
self.calls = []

async def send_raw(self, method, params=None, session_id=None):
self.calls.append((method, params, session_id))
if method == "Runtime.evaluate":
raise RuntimeError("Session with given id not found")
if method == "Target.getTargets":
return {"targetInfos": [
{"targetId": "wedged", "type": "page", "url": "https://wedged.example/"},
]}
if method == "Target.attachToTarget":
return {"sessionId": "session-wedged"}
if method == "Target.createTarget":
# If recovery is unbounded, it would fall through to creating
# about:blank (also wedged below) and keep hanging.
return {"targetId": "blank"}
if method.endswith(".enable"):
await asyncio.Event().wait() # every enable hangs
return {}

d = daemon.Daemon()
d.cdp = _StaleThenWedgedCDP()
d.session = "stale-session"

resp = asyncio.run(d.handle({"method": "Runtime.evaluate", "params": {"expression": "1+1"}}))
assert "error" in resp
assert "re-attach timed out" in resp["error"], (
f"recovery must surface a clean re-attach timeout, got: {resp['error']}"
)