diff --git a/percy/snapshot.py b/percy/snapshot.py index 5c75387..1bdc0d1 100644 --- a/percy/snapshot.py +++ b/percy/snapshot.py @@ -4,7 +4,7 @@ from contextlib import contextmanager from functools import lru_cache from time import sleep -from urllib.parse import urlparse, urljoin +from urllib.parse import urlparse import requests from selenium.webdriver import __version__ as SELENIUM_VERSION @@ -150,7 +150,11 @@ def iframe_context(driver, frame_element): driver.switch_to.parent_frame() def process_frame(driver, frame_element, options, percy_dom_script): - """Processes a single cross-origin frame to capture its snapshot.""" + """Processes a single cross-origin frame to capture its snapshot. + + Kept for backwards compatibility with existing tests/callers. New code paths + (nested CORS-iframe capture) go through ``process_frame_tree``. + """ frame_url = frame_element.get_attribute('src') or "unknown-src" with iframe_context(driver, frame_element): try: @@ -177,19 +181,474 @@ def process_frame(driver, frame_element, options, percy_dom_script): } -def _is_unsupported_iframe_src(frame_src): +# In-browser script that walks document.querySelectorAll('iframe') and returns +# metadata for each. Mirrors percy-nightwatch's enumerateIframesScript so the +# wire shape stays in sync. Selectors is a list[str] of CSS selectors that +# users want to opt out of CORS iframe capture for. +def enumerate_iframes_script(selectors): + selectors_json = json.dumps(list(selectors or [])) return ( - not frame_src or - frame_src == "about:blank" or - frame_src.startswith("javascript:") or - frame_src.startswith("data:") or - frame_src.startswith("vbscript:") + "var __percySelectors = " + selectors_json + ";" + "var __percyIframes = document.querySelectorAll('iframe');" + "var __percyResult = [];" + "for (var i = 0; i < __percyIframes.length; i++) {" + " var f = __percyIframes[i];" + " var matchesIgnore = false;" + " if (__percySelectors && __percySelectors.length) {" + " for (var j = 0; j < __percySelectors.length; j++) {" + " try { if (f.matches(__percySelectors[j])) { matchesIgnore = true; break; } }" + " catch (e) {}" + " }" + " }" + " __percyResult.push({" + " src: f.src || ''," + " srcdoc: f.getAttribute('srcdoc')," + " percyElementId: f.getAttribute('data-percy-element-id')," + " dataPercyIgnore: f.hasAttribute('data-percy-ignore')," + " matchesIgnoreSelector: matchesIgnore," + " index: i" + " });" + "}" + "return __percyResult;" + ) + + +def _should_skip_iframe(iframe, current_origin): + # pylint: disable=too-many-return-statements + """Mirror of nightwatch's shouldSkipIframe — pure on the enumerated metadata.""" + if iframe.get('dataPercyIgnore'): + log(f"Skipping iframe marked with data-percy-ignore: {iframe.get('src') or '(no src)'}", + "debug") + return True + if iframe.get('matchesIgnoreSelector'): + log(f"Skipping iframe matching ignoreIframeSelectors: " + f"{iframe.get('src') or '(no src)'}", "debug") + return True + # Check srcdoc BEFORE the src-emptiness check: a pure-srcdoc iframe has no + # src attribute, and we want it routed through the srcdoc-specific branch + # (where same-origin inlining handles it) rather than silently lumped under + # "unsupported src". + if iframe.get('srcdoc'): + log(f"Skipping srcdoc iframe at index {iframe.get('index')}", "debug") + return True + src = iframe.get('src') or '' + if not src or is_unsupported_iframe_src(src): + if src: + log(f"Skipping unsupported iframe src: {src}", "debug") + return True + frame_origin = get_origin(src) + if not frame_origin: + log(f"Skipping iframe with invalid URL: {src}", "debug") + return True + if frame_origin == current_origin: + log(f"Skipping same-origin iframe: {src}", "debug") + return True + if not iframe.get('percyElementId'): + log(f"Skipping cross-origin iframe without data-percy-element-id: {src}", "debug") + return True + return False + + +def process_frame_tree(driver, iframe_meta, depth, ancestor_urls, ctx): + # pylint: disable=too-many-return-statements,too-many-statements + """Recursively capture a cross-origin iframe and any nested cross-origin + descendants. Bounded by ``ctx['max_frame_depth']`` to prevent runaway + recursion when pages link to each other in cycles. ``ancestor_urls`` is the + chain of frame URLs above this one — if the current frame's URL appears in + the chain we treat it as a cycle and stop descending. + """ + max_frame_depth = ctx['max_frame_depth'] + ignore_selectors = ctx['ignore_selectors'] + serialize_options = ctx['serialize_options'] + percy_dom_script = ctx['percy_dom_script'] + + if depth > max_frame_depth: + log(f"Reached max iframe nesting depth ({max_frame_depth}); " + f"stopping at {iframe_meta.get('src')}", "debug") + return [] + if ancestor_urls and iframe_meta.get('src') in ancestor_urls: + log(f"Skipping cyclic iframe ({iframe_meta.get('src')} appears in ancestor chain)", + "debug") + return [] + + collected = [] + switched_in = False + captured_error = None + # Track the post-switch URL so we can also detect cycles where a frame's + # static src differs from its resolved document.URL (redirect chains). + inside_url = None + + try: + log(f"Processing cross-origin iframe (depth {depth}): {iframe_meta.get('src')}", + "debug") + + # Find the iframe element by its data-percy-element-id rather than by + # numeric index, which avoids drift if the DOM mutated between + # enumeration and switch. + find_script = ( + "return document.querySelector(" + "'iframe[data-percy-element-id=\"' + arguments[0] + '\"]'" + ");" + ) + iframe_element = driver.execute_script( + find_script, iframe_meta['percyElementId'] + ) + if not iframe_element: + log(f"Could not find iframe element with data-percy-element-id: " + f"{iframe_meta['percyElementId']}", "debug") + return [] + + driver.switch_to.frame(iframe_element) + switched_in = True + + # Post-switch URL re-check: a frame's src attribute may have pointed + # somewhere reachable but the actual loaded document can be about:blank + # or a net-error page. Read document.URL inside the frame and bail if + # unsupported. + try: + inside_url = driver.execute_script("return document.URL;") + except Exception: # pylint: disable=broad-except + inside_url = None + if is_unsupported_iframe_src(inside_url): + log(f"Skipping iframe (post-switch URL unsupported): {inside_url}", "debug") + return [] + # Second cycle check, on the resolved document.URL. A redirect chain + # (src=A → 30x → B) wouldn't trip the pre-switch guard because the + # static src doesn't appear in ancestor_urls — but the post-switch URL + # would. Catch the cycle here before we serialize and recurse. + if ancestor_urls and inside_url and inside_url in ancestor_urls: + log(f"Skipping cyclic iframe ({inside_url} appears in ancestor chain " + "via redirect resolution)", "debug") + return [] + + # Inject PercyDOM and serialize. enableJavaScript is forced to True so + # that the standard iframe serialization path is bypassed — we handle + # CORS iframe serialization manually here. + driver.execute_script(percy_dom_script) + frame_options = {**serialize_options, 'enableJavaScript': True} + frame_result = driver.execute_script( + "return { snapshot: PercyDOM.serialize(" + json.dumps(frame_options) + ")," + " frameUrl: document.URL };" + ) + + if not frame_result or not frame_result.get('snapshot'): + log(f"Serialization returned empty result for frame: {iframe_meta.get('src')}", + "debug") + return [] + + frame_url = frame_result.get('frameUrl') or iframe_meta.get('src') or "unknown-src" + log(f"Captured cross-origin iframe (depth {depth}): {frame_url}", "debug") + + collected.append({ + "iframeData": {"percyElementId": iframe_meta['percyElementId']}, + "iframeSnapshot": frame_result['snapshot'], + "frameUrl": frame_url + }) + + # Look for cross-origin iframes nested inside this frame and recurse. + # Same-origin descendants are already inlined as srcdoc by + # PercyDOM.serialize above. Compare each nested-frame origin against + # this frame's origin (the immediate parent), not the page origin. + if depth < max_frame_depth: + current_origin = get_origin(frame_url) + try: + child_iframes_raw = driver.execute_script( + enumerate_iframes_script(ignore_selectors) + ) + except Exception as e: # pylint: disable=broad-except + log(f"Failed to enumerate nested iframes: {e}", "debug") + child_iframes_raw = [] + child_iframes = child_iframes_raw if isinstance(child_iframes_raw, list) else [] + next_ancestors = set(ancestor_urls or []) + next_ancestors.add(frame_url) + if iframe_meta.get('src'): + next_ancestors.add(iframe_meta['src']) + for child in child_iframes: + if _should_skip_iframe(child, current_origin): + continue + nested = process_frame_tree(driver, child, depth + 1, next_ancestors, ctx) + if nested: + collected.extend(nested) + + return collected + except PercyContextLost as err: + # Merge any partial capture from the inner level before propagating. + if err.partial_capture: + collected.extend(err.partial_capture) + err.partial_capture = collected + raise + except Exception as error: # pylint: disable=broad-except + # Top-level (depth==1) failures mean a user-visible iframe didn't get + # captured. Surface those at info so users notice missing iframes; deeper + # nested failures stay at debug to avoid log spam in chatty pages. + failure_lvl = "info" if depth == 1 else "debug" + log(f"Failed to process cross-origin iframe {iframe_meta.get('src')}: {error}", + failure_lvl) + captured_error = error + return collected + finally: + if switched_in: + # Step up exactly one level so an outer recursion can continue from + # its own context. If parent_frame fails we have no reliable way to + # land in the correct parent — fall back to default_content and + # signal the caller to stop iterating siblings (whose enumeration + # was performed in a now-lost context). + try: + driver.switch_to.parent_frame() + except Exception as e: # pylint: disable=broad-except + log(f"Failed to switch back to parent frame: {e}", "debug") + try: + driver.switch_to.default_content() + except Exception: # pylint: disable=broad-except + pass + if depth > 1: + lost = PercyContextLost( + f"Lost parent frame context: {e}", + partial_capture=collected + ) + if captured_error is not None: + lost.__cause__ = captured_error + # pylint: disable=lost-exception + raise lost from e # noqa: B904 + + +def _capture_cors_iframes(driver, page_url, ctx): + """Top-level walk: enumerate page iframes, recurse into cross-origin ones.""" + try: + try: + iframe_info_raw = driver.execute_script( + enumerate_iframes_script(ctx['ignore_selectors']) + ) + except Exception as e: # pylint: disable=broad-except + log(f"Failed to enumerate top-level iframes: {e}", "debug") + return [] + iframe_info = iframe_info_raw if isinstance(iframe_info_raw, list) else [] + if not iframe_info: + return [] + + log(f"Found {len(iframe_info)} top-level iframe(s)", "debug") + page_origin = get_origin(page_url) + cors_iframes = [] + skipped = 0 + + for iframe in iframe_info: + if _should_skip_iframe(iframe, page_origin): + skipped += 1 + continue + try: + entries = process_frame_tree( + driver, iframe, 1, {page_url} if page_url else set(), ctx + ) + except PercyContextLost as err: + log("Aborting further nested CORS capture due to lost frame context", + "debug") + if err.partial_capture: + cors_iframes.extend(err.partial_capture) + break + if entries: + cors_iframes.extend(entries) + + log(f"Captured {len(cors_iframes)} cross-origin iframe(s) " + f"(top-level skipped: {skipped})", "debug") + return cors_iframes + except Exception as e: # pylint: disable=broad-except + log(f"Error capturing CORS iframes: {e}", "debug") + return [] + + +def expose_closed_shadow_roots(driver): + # pylint: disable=too-many-nested-blocks + """Use CDP to find every closed shadow root in the page and stash each + {host -> shadowRoot} pair in a WeakMap on ``window``. PercyDOM.serialize + reads from that map to capture closed-mode shadow DOM that would otherwise + be invisible to ordinary DOM traversal. Non-Chromium drivers will fail the + initial CDP call and we silently no-op. + """ + if not hasattr(driver, 'execute_cdp_cmd'): + return + try: + driver.execute_cdp_cmd("DOM.enable", {}) + except Exception as e: # pylint: disable=broad-except + log(f"CDP unavailable for closed shadow DOM capture: {e}", "debug") + return + try: + doc = driver.execute_cdp_cmd( + "DOM.getDocument", {"depth": -1, "pierce": True} + ) + root = doc.get("root") if isinstance(doc, dict) else None + closed_pairs = [] + + # Iterative walker. Recursive Python on a very deep DOM blows past + # CPython's recursion limit (~1000) and raises RecursionError, which + # the outer broad-except would silently swallow — meaning a deep page + # would just lose closed-shadow exposure with no diagnostic. A stack + # keeps memory bounded by tree breadth instead of tree depth. + if root: + stack = [root] + while stack: + node = stack.pop() + # Skip nodes inside child frame documents — cross-frame closed + # shadow roots are not yet supported (their execution context + # lacks the WeakMap). + if not isinstance(node, dict) or node.get("contentDocument"): + continue + shadow_roots = node.get("shadowRoots") or [] + for sr in shadow_roots: + if sr.get("shadowRootType") == "closed": + closed_pairs.append({ + "hostBackendNodeId": node.get("backendNodeId"), + "shadowBackendNodeId": sr.get("backendNodeId") + }) + stack.append(sr) + for child in (node.get("children") or []): + stack.append(child) + + if not closed_pairs: + return + + log(f"Found {len(closed_pairs)} closed shadow root(s), exposing via CDP", + "debug") + + # Create the WeakMap on the page (same key as PercyDOM looks up). + driver.execute_script( + "window.__percyClosedShadowRoots = " + "window.__percyClosedShadowRoots || new WeakMap();" + ) + + for pair in closed_pairs: + try: + host_obj = driver.execute_cdp_cmd( + "DOM.resolveNode", {"backendNodeId": pair["hostBackendNodeId"]} + ) + shadow_obj = driver.execute_cdp_cmd( + "DOM.resolveNode", {"backendNodeId": pair["shadowBackendNodeId"]} + ) + host_id = (host_obj.get("object") or {}).get("objectId") + shadow_id = (shadow_obj.get("object") or {}).get("objectId") + if not host_id or not shadow_id: + continue + driver.execute_cdp_cmd("Runtime.callFunctionOn", { + "functionDeclaration": + "function(shadowRoot) {" + " window.__percyClosedShadowRoots.set(this, shadowRoot); }", + "objectId": host_id, + "arguments": [{"objectId": shadow_id}] + }) + except Exception as e: # pylint: disable=broad-except + log(f"Failed to expose a closed shadow root via CDP: {e}", "debug") + except Exception as e: # pylint: disable=broad-except + log(f"Could not expose closed shadow roots via CDP: {e}", "debug") + finally: + try: + driver.execute_cdp_cmd("DOM.disable", {}) + except Exception: # pylint: disable=broad-except + pass + + +# --------------------------------------------------------------------------- +# Inlined SDK helpers (mirrors @percy/sdk-utils used by Node SDKs). We do not +# bump a shared utils package — selenium-python ships these directly so that +# behavior stays in sync with percy-nightwatch / percy-webdriverio. +# --------------------------------------------------------------------------- + +DEFAULT_MAX_FRAME_DEPTH = 5 + + +def is_unsupported_iframe_src(frame_src): + """True if a frame's src cannot be navigated/loaded for serialization.""" + if not frame_src: + return True + unsupported_exact = ("about:blank", "about:srcdoc") + unsupported_prefixes = ( + "javascript:", "data:", "vbscript:", "blob:", + "chrome:", "chrome-extension:", "about:" ) + if frame_src in unsupported_exact: + return True + for prefix in unsupported_prefixes: + if frame_src.startswith(prefix): + return True + return False + + +# Backwards-compatible private alias kept for any external callers. +_is_unsupported_iframe_src = is_unsupported_iframe_src + + +def get_origin(url): + """Return scheme://netloc for a URL, or None when parsing fails.""" + try: + parsed = urlparse(url) + if not parsed.scheme or not parsed.netloc: + return None + return f"{parsed.scheme}://{parsed.netloc}" + except Exception: # pylint: disable=broad-except + return None def _get_origin(url): - parsed = urlparse(url) - return f"{parsed.scheme}://{parsed.netloc}" + """Compat shim: previous Feature 1 code expected a non-None string.""" + origin = get_origin(url) + return origin if origin is not None else "" + + +def clamp_frame_depth(value, default_max=DEFAULT_MAX_FRAME_DEPTH): + """Clamp a user-provided depth into [1, default_max].""" + try: + n = int(value) + except (TypeError, ValueError): + return default_max + if n < 1: + return 1 + if n > default_max: + return default_max + return n + + +def normalize_ignore_selectors(value): + """Accept str|list|None and return a clean list[str].""" + if value is None: + return [] + if isinstance(value, str): + return [value] if value.strip() else [] + if isinstance(value, (list, tuple)): + return [s for s in value if isinstance(s, str) and s.strip()] + return [] + + +def resolve_max_frame_depth(options, percy_config): + """Read maxIframeDepth from per-snapshot options or percy.config.snapshot.""" + options = options or {} + config = (percy_config or {}).get('snapshot', {}) if isinstance(percy_config, dict) else {} + raw = options.get('maxIframeDepth') + if raw is None: + raw = options.get('max_iframe_depth') + if raw is None: + raw = config.get('maxIframeDepth', DEFAULT_MAX_FRAME_DEPTH) + return clamp_frame_depth(raw) + + +def resolve_ignore_selectors(options, percy_config): + """Read ignoreIframeSelectors from per-snapshot options or percy.config.snapshot.""" + options = options or {} + config = (percy_config or {}).get('snapshot', {}) if isinstance(percy_config, dict) else {} + raw = options.get('ignoreIframeSelectors') + if raw is None: + raw = options.get('ignore_iframe_selectors') + if raw is None: + raw = config.get('ignoreIframeSelectors', []) + return normalize_ignore_selectors(raw) + + +class PercyContextLost(Exception): + """Raised when an iframe-context switch goes wrong mid-traversal. + + Carries any partial corsIframes capture already collected so the outer + caller can still emit a useful payload before bailing on the rest. + """ + def __init__(self, message, partial_capture=None): + super().__init__(message) + self.partial_capture = partial_capture or [] def _resolve_readiness_config(percy_config, kwargs): """Shallow-merge global (.percy.yml) readiness config with per-snapshot @@ -312,34 +771,21 @@ def get_serialized_dom(driver, cookies, percy_config=None, percy_dom_script=None # "gate ran, no notable diagnostics"). if readiness_diagnostics is not None and isinstance(dom_snapshot, dict): dom_snapshot['readiness_diagnostics'] = readiness_diagnostics - # 2. Process CORS IFrames - try: - page_origin = _get_origin(driver.current_url) - iframes = driver.find_elements("tag name", "iframe") - if iframes and percy_dom_script: - processed_frames = [] - for frame in iframes: - frame_src = frame.get_attribute('src') - if _is_unsupported_iframe_src(frame_src): - continue - - try: - frame_origin = _get_origin(urljoin(driver.current_url, frame_src)) - except Exception as e: - log(f"Skipping iframe \"{frame_src}\": {e}", "debug") - continue - - if frame_origin == page_origin: - continue - - result = process_frame(driver, frame, kwargs, percy_dom_script) - if result: - processed_frames.append(result) - - if processed_frames: - dom_snapshot['corsIframes'] = processed_frames - except Exception as e: - log(f"Failed to process cross-origin iframes: {e}", "debug") + # 2. Process CORS iframes (nested, depth-capped, cycle-guarded, ignore-aware) + if percy_dom_script: + ctx = { + 'max_frame_depth': resolve_max_frame_depth(kwargs, percy_config), + 'ignore_selectors': resolve_ignore_selectors(kwargs, percy_config), + 'serialize_options': dict(kwargs), + 'percy_dom_script': percy_dom_script, + } + try: + page_url = driver.current_url + except Exception: # pylint: disable=broad-except + page_url = None + cors_iframes = _capture_cors_iframes(driver, page_url, ctx) + if cors_iframes: + dom_snapshot['corsIframes'] = cors_iframes dom_snapshot['cookies'] = cookies return dom_snapshot @@ -457,22 +903,30 @@ def capture_responsive_dom(driver, cookies, config, percy_dom_script=None, **kwa log(f'Reloading page for width: {width}', 'debug') driver.refresh() driver.execute_script(percy_dom_script) + # Re-prime closed shadow roots after the page reload — the WeakMap + # on window was destroyed when navigation happened. + expose_closed_shadow_roots(driver) _setup_resize_listener(driver) driver.execute_script("PercyDOM.waitForResize();") resize_count = 0 # Reset count because the listener just started fresh print(f'{width}x{height} ready, taking snapshot...') _responsive_sleep() dom_snapshot = get_serialized_dom( - driver, cookies, percy_config=config, - percy_dom_script=percy_dom_script, + driver, cookies, percy_dom_script=percy_dom_script, + percy_config=config, skip_readiness=True, readiness_diagnostics=responsive_readiness_diagnostics, **kwargs) dom_snapshot['width'] = width print(f'Taken snapshot for width: {width}, height: {height}') dom_snapshots.append(dom_snapshot) - with open("output_file.json", "w", encoding="utf-8") as file_handle: - json.dump(dom_snapshots, file_handle, indent=4) + # Optional debug dump — gated to avoid polluting the user's CWD on every CI run. + if PERCY_DEBUG: + try: + with open("output_file.json", "w", encoding="utf-8") as file_handle: + json.dump(dom_snapshots, file_handle, indent=4) + except Exception as e: # pylint: disable=broad-except + log(f"Could not write debug snapshot dump: {type(e).__name__}: {e}", "debug") change_window_dimension_and_wait(driver, current_width, current_height, resize_count + 1) return dom_snapshots @@ -498,6 +952,9 @@ def percy_snapshot(driver, name, **kwargs): # Inject the DOM serialization script percy_dom_script = fetch_percy_dom() driver.execute_script(percy_dom_script) + # Expose closed shadow roots via CDP before serialization so PercyDOM + # can find them through the WeakMap (Chromium-only; non-Chromium no-ops). + expose_closed_shadow_roots(driver) cookies = driver.get_cookies() # Serialize and capture the DOM @@ -511,8 +968,8 @@ def percy_snapshot(driver, name, **kwargs): ) else: dom_snapshot = get_serialized_dom( - driver, cookies, percy_config=data.get('config'), - percy_dom_script=percy_dom_script, **kwargs) + driver, cookies, percy_dom_script=percy_dom_script, + percy_config=data.get('config'), **kwargs) # Strip SDK-local `readiness` from the snapshot POST body. The CLI # already has it via healthcheck; sending it again here risks future diff --git a/tests/test_snapshot.py b/tests/test_snapshot.py index f5800e7..0c05f7f 100644 --- a/tests/test_snapshot.py +++ b/tests/test_snapshot.py @@ -384,7 +384,12 @@ def test_posts_snapshots_to_the_local_percy_server_with_defer_and_responsive(sel self.assertEqual(httpretty.last_request().path, '/percy/snapshot') - s1 = httpretty.latest_requests()[2].parsed_body + snap_bodies = [ + r.parsed_body for r in httpretty.latest_requests() + if r.path == '/percy/snapshot' and r.method == 'POST' + and isinstance(r.parsed_body, dict) + ] + s1 = snap_bodies[0] self.assertEqual(s1['name'], 'Snapshot 1') self.assertEqual(s1['url'], 'http://localhost:8000/') self.assertEqual(s1['dom_snapshot'], expected_dom_snapshot) @@ -397,15 +402,20 @@ def test_posts_snapshots_to_the_local_percy_server_for_responsive_dom_chrome(sel driver = MockChrome.return_value # execute_script calls (reload=False): # [0] inject percy_dom [1] _setup_resize_listener [2] waitForResize - # [3] resize-check w375 [4] serialize w375 - # [5] resize-check w390 [6] serialize w390 [7] restore resize-check + # [3] resize-check w375 [4] serialize w375 [5] enumerate iframes w375 + # [6] resize-check w390 [7] serialize w390 [8] enumerate iframes w390 + # [9] restore resize-check driver.execute_script.side_effect = [ - '', '', None, 1, { 'html': 'some_dom' }, 2, { 'html': 'some_dom_1' }, 3 + '', '', None, + 1, { 'html': 'some_dom' }, [], + 2, { 'html': 'some_dom_1' }, [], + 3 ] driver.get_cookies.return_value = '' driver.execute_cdp_cmd.return_value = '' driver.get_window_size.return_value = { 'height': 400, 'width': 800 } - # Return empty iframe list so CORS-iframe code path is skipped + # find_elements is no longer used by the iframe path; left for any + # legacy callers. driver.find_elements.return_value = [] mock_logger() mock_healthcheck(widths = { "config": [375], "mobile": [390] }) @@ -431,12 +441,18 @@ def test_posts_snapshots_to_the_local_percy_server_for_responsive_dom_chrome(sel def test_has_a_backwards_compatible_function(self): mock_healthcheck() mock_snapshot() + mock_logger() percySnapshot(browser=self.driver, name='Snapshot') self.assertEqual(httpretty.last_request().path, '/percy/snapshot') - s1 = httpretty.latest_requests()[2].parsed_body + snap_bodies = [ + r.parsed_body for r in httpretty.latest_requests() + if r.path == '/percy/snapshot' and r.method == 'POST' + and isinstance(r.parsed_body, dict) + ] + s1 = snap_bodies[0] self.assertEqual(s1['name'], 'Snapshot') self.assertEqual(s1['url'], 'http://localhost:8000/') self.assertEqual(s1['dom_snapshot'], { @@ -1135,29 +1151,45 @@ def test_process_frame_returns_none_on_script_injection_failure(self): self.assertIsNone(result) + # ------------------------------------------------------------------ + # Helpers for nested-frame-tree tests. The new flow drives iframe + # discovery via driver.execute_script(enumerate_iframes_script(...)) + # which returns a list of metadata dicts (no real WebElements). + # ------------------------------------------------------------------ + @staticmethod + def _meta(src, percy_id, *, srcdoc=None, ignore=False, matches=False, index=0): + return { + "src": src, + "srcdoc": srcdoc, + "percyElementId": percy_id, + "dataPercyIgnore": ignore, + "matchesIgnoreSelector": matches, + "index": index, + } + def test_get_serialized_dom_populates_cors_iframes(self): driver = Mock() + # execute_script call order: + # [0] main serialize [1] enumerate top-level iframes + # [2] querySelector (find iframe by id) + # [3] post-switch document.URL re-check + # [4] inject PercyDOM in frame + # [5] serialize the frame [6] enumerate nested iframes (empty) driver.execute_script.side_effect = [ - { - "html": '', - "resources": [{"url": "https://cdn/main.css", "content": "m"}]}, + {"html": '', + "resources": [{"url": "https://cdn/main.css", "content": "m"}]}, + [self._meta("http://main.example.com/inner", None), + self._meta("https://cross.example.com/page", "cid-1", index=1)], + Mock(name="iframe_element"), + "https://cross.example.com/page", None, - { - "html": "", - "resources": [{"url": "https://cdn/frame.css", "content": "f"}]}, + {"snapshot": {"html": "", + "resources": [{"url": "https://cdn/frame.css", "content": "f"}]}, + "frameUrl": "https://cross.example.com/page"}, + [], ] driver.current_url = "http://main.example.com/" - same_origin_frame = Mock() - same_origin_frame.get_attribute = lambda attr: ( - "http://main.example.com/inner" if attr == 'src' else None - ) - cross_origin_frame = Mock() - cross_origin_frame.get_attribute = lambda attr: ( - "https://cross.example.com/page" if attr == 'src' else "cid-1" - ) - driver.find_elements.return_value = [same_origin_frame, cross_origin_frame] - dom = local.get_serialized_dom(driver, [], percy_dom_script="some_script") self.assertIn("corsIframes", dom) @@ -1166,23 +1198,18 @@ def test_get_serialized_dom_populates_cors_iframes(self): self.assertEqual(entry["iframeData"]["percyElementId"], "cid-1") self.assertEqual(entry["iframeSnapshot"]["html"], "") self.assertEqual(entry["frameUrl"], "https://cross.example.com/page") - # HTML is left unchanged (no srcdoc injection here — core handles that) self.assertNotIn("srcdoc", dom["html"]) def test_get_serialized_dom_skips_blank_src_frames(self): """Frames with no src or src='about:blank' are not processed.""" driver = Mock() - driver.execute_script.return_value = { - "html": '' - } + driver.execute_script.side_effect = [ + {"html": ''}, + [self._meta("about:blank", None), + self._meta("", None, index=1)], + ] driver.current_url = "http://main.example.com/" - blank_frame = Mock() - blank_frame.get_attribute = lambda attr: ("about:blank" if attr == 'src' else None) - no_src_frame = Mock() - no_src_frame.get_attribute = lambda attr: (None if attr == 'src' else None) - driver.find_elements.return_value = [blank_frame, no_src_frame] - dom = local.get_serialized_dom(driver, [], percy_dom_script="some_script") self.assertNotIn("corsIframes", dom) @@ -1195,12 +1222,6 @@ def test_get_serialized_dom_no_cors_iframes_without_script(self): } driver.current_url = "http://main.example.com/" - cross_origin_frame = Mock() - cross_origin_frame.get_attribute = lambda attr: ( - "https://cross.example.com/page" if attr == 'src' else "cid-1" - ) - driver.find_elements.return_value = [cross_origin_frame] - dom = local.get_serialized_dom(driver, [], percy_dom_script=None) self.assertNotIn("corsIframes", dom) @@ -1208,71 +1229,62 @@ def test_get_serialized_dom_no_cors_iframes_without_script(self): def test_get_serialized_dom_cookies_always_attached(self): """Cookies are always added to the dom_snapshot regardless of iframes.""" driver = Mock() - driver.execute_script.return_value = {"html": ""} + driver.execute_script.side_effect = [{"html": ""}, []] driver.current_url = "http://main.example.com/" - driver.find_elements.return_value = [] cookies = [{"name": "session", "value": "abc"}] - dom = local.get_serialized_dom(driver, cookies) + dom = local.get_serialized_dom(driver, cookies, percy_dom_script="script") self.assertEqual(dom["cookies"], cookies) def test_get_serialized_dom_same_host_different_scheme_is_cross_origin(self): - """http://example.com and https://example.com differ in scheme → cross-origin. - Previously the netloc-only check would miss this; the origin-based check catches it.""" + """http://example.com vs https://example.com → cross-origin.""" driver = Mock() driver.execute_script.side_effect = [ {"html": ''}, - None, # percy_dom inject into frame - {"html": ""}, # frame serialize + [self._meta("https://main.example.com/widget", "percy-id-1")], + Mock(), + "https://main.example.com/widget", + None, + {"snapshot": {"html": ""}, "frameUrl": "https://main.example.com/widget"}, + [], ] driver.current_url = "http://main.example.com/" - # Same host, DIFFERENT scheme — should be treated as cross-origin - https_frame = Mock() - https_frame.get_attribute = lambda attr: ( - "https://main.example.com/widget" if attr == 'src' else "percy-id-1" - ) - driver.find_elements.return_value = [https_frame] - dom = local.get_serialized_dom(driver, [], percy_dom_script="script") self.assertIn("corsIframes", dom) self.assertEqual(dom["corsIframes"][0]["iframeSnapshot"]["html"], "") def test_get_serialized_dom_same_host_different_port_is_cross_origin(self): - """http://example.com:3000 and http://example.com:4000 differ in port → cross-origin.""" + """http://example.com:3000 vs http://example.com:4000 → cross-origin.""" driver = Mock() driver.execute_script.side_effect = [ {"html": ''}, + [self._meta("http://main.example.com:4000/widget", "percy-id-port")], + Mock(), + "http://main.example.com:4000/widget", None, - {"html": ""}, + {"snapshot": {"html": ""}, + "frameUrl": "http://main.example.com:4000/widget"}, + [], ] driver.current_url = "http://main.example.com:3000/" - diff_port_frame = Mock() - diff_port_frame.get_attribute = lambda attr: ( - "http://main.example.com:4000/widget" if attr == 'src' else "percy-id-port" - ) - driver.find_elements.return_value = [diff_port_frame] - dom = local.get_serialized_dom(driver, [], percy_dom_script="script") self.assertIn("corsIframes", dom) self.assertEqual(dom["corsIframes"][0]["iframeSnapshot"]["html"], "") def test_get_serialized_dom_same_origin_is_not_cross_origin(self): - """http://main.example.com/page1 and http://main.example.com/page2 share origin.""" + """Same-origin iframes are skipped before any frame switch.""" driver = Mock() - driver.execute_script.return_value = {"html": ""} + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("http://main.example.com/inner.html", "percy-id-same")], + ] driver.current_url = "http://main.example.com/" - same_origin_frame = Mock() - same_origin_frame.get_attribute = lambda attr: ( - "http://main.example.com/inner.html" if attr == 'src' else "percy-id-same" - ) - driver.find_elements.return_value = [same_origin_frame] - dom = local.get_serialized_dom(driver, [], percy_dom_script="script") self.assertNotIn("corsIframes", dom) @@ -1318,15 +1330,16 @@ def test_get_serialized_dom_corsIframes_entry_has_correct_structure(self): driver = Mock() driver.execute_script.side_effect = [ {"html": dom_html, "resources": []}, + [self._meta("https://cross.example.com/page", "cid-1")], + Mock(), + "https://cross.example.com/page", None, - {"html": '

Frame

', "resources": [frame_resource]}, + {"snapshot": {"html": '

Frame

', + "resources": [frame_resource]}, + "frameUrl": "https://cross.example.com/page"}, + [], ] driver.current_url = "http://main.example.com/" - frame = Mock() - frame.get_attribute = lambda attr: ( - "https://cross.example.com/page" if attr == 'src' else "cid-1" - ) - driver.find_elements.return_value = [frame] dom = local.get_serialized_dom(driver, [], percy_dom_script="some_script") @@ -1341,30 +1354,23 @@ def test_get_serialized_dom_multiple_cross_origin_frames(self): """All cross-origin frames are collected; same-origin frames are skipped.""" driver = Mock() driver.execute_script.side_effect = [ - { - "html": '' - '' - '' - }, - None, {"html": ""}, - None, {"html": ""}, + {"html": '' + '' + ''}, + [self._meta("https://a.other.com/w1", "pid-1"), + self._meta("http://main.example.com/inner", "pid-same", index=1), + self._meta("https://b.other.com/w2", "pid-2", index=2)], + # frame 1 + Mock(), "https://a.other.com/w1", None, + {"snapshot": {"html": ""}, "frameUrl": "https://a.other.com/w1"}, + [], + # frame 2 + Mock(), "https://b.other.com/w2", None, + {"snapshot": {"html": ""}, "frameUrl": "https://b.other.com/w2"}, + [], ] driver.current_url = "http://main.example.com/" - frame1 = Mock() - frame1.get_attribute = lambda attr: ( - "https://a.other.com/w1" if attr == 'src' else "pid-1" - ) - frame2 = Mock() - frame2.get_attribute = lambda attr: ( - "https://b.other.com/w2" if attr == 'src' else "pid-2" - ) - same_origin = Mock() - same_origin.get_attribute = lambda attr: ( - "http://main.example.com/inner" if attr == 'src' else "pid-same" - ) - driver.find_elements.return_value = [frame1, same_origin, frame2] - dom = local.get_serialized_dom(driver, [], percy_dom_script="script") self.assertIn("corsIframes", dom) @@ -1374,13 +1380,15 @@ def test_get_serialized_dom_multiple_cross_origin_frames(self): self.assertIn("pid-2", pids) self.assertNotIn("pid-same", pids) - def test_get_serialized_dom_handles_find_elements_exception(self): - """If find_elements raises, the error is swallowed, cookies are still attached, - and DOM stitching is skipped.""" + def test_get_serialized_dom_handles_enumerate_exception(self): + """If iframe enumeration raises, the error is swallowed, cookies are still + attached, and CORS iframe stitching is skipped.""" driver = Mock() - driver.execute_script.return_value = {"html": ""} + driver.execute_script.side_effect = [ + {"html": ""}, + Exception("enumerate error"), + ] driver.current_url = "http://main.example.com/" - driver.find_elements.side_effect = Exception("find error") dom = local.get_serialized_dom(driver, [{"name": "k", "value": "v"}], percy_dom_script="script") @@ -1391,34 +1399,334 @@ def test_get_serialized_dom_handles_find_elements_exception(self): def test_get_serialized_dom_process_frame_failure_is_skipped(self): """If a cross-origin frame fails to process, it is omitted and the rest succeed.""" driver = Mock() - # Calls: main serialize, fail-frame inject (raises), ok-frame inject, ok-frame serialize driver.execute_script.side_effect = [ - { - "html": '' - '' - }, - Exception("inject failed"), # fail_frame injection raises - None, # ok_frame inject - {"html": ""}, # ok_frame serialize + {"html": '' + ''}, + [self._meta("https://fail.example.com/page", "pid-fail"), + self._meta("https://ok.example.com/page", "pid-ok", index=1)], + # fail frame: querySelector returns, document.URL ok, inject raises + Mock(), "https://fail.example.com/page", Exception("inject failed"), + # ok frame + Mock(), "https://ok.example.com/page", None, + {"snapshot": {"html": ""}, "frameUrl": "https://ok.example.com/page"}, + [], + ] + driver.current_url = "http://main.example.com/" + # switch_to.frame succeeds for both; parent_frame called in finally + dom = local.get_serialized_dom(driver, [], percy_dom_script="script") + + self.assertIn("corsIframes", dom) + self.assertEqual(len(dom["corsIframes"]), 1) + self.assertEqual(dom["corsIframes"][0]["iframeData"]["percyElementId"], "pid-ok") + self.assertEqual(dom["corsIframes"][0]["iframeSnapshot"]["html"], "") + + +class TestIframeHelpers(unittest.TestCase): + """Unit tests for the inlined sdk-utils helpers.""" + + def test_is_unsupported_iframe_src_truthy_cases(self): + for src in [None, '', 'about:blank', 'about:srcdoc', 'javascript:alert(1)', + 'data:text/html;base64,', 'blob:http://foo', 'chrome://settings', + 'vbscript:msg']: + self.assertTrue(local.is_unsupported_iframe_src(src), msg=src) + + def test_is_unsupported_iframe_src_falsy_cases(self): + for src in ['http://x', 'https://x.example.com/p', 'http://x.example.com:8080/']: + self.assertFalse(local.is_unsupported_iframe_src(src), msg=src) + + def test_clamp_frame_depth_bounds(self): + self.assertEqual(local.clamp_frame_depth(0), 1) + self.assertEqual(local.clamp_frame_depth(-3), 1) + self.assertEqual(local.clamp_frame_depth(1), 1) + self.assertEqual(local.clamp_frame_depth(3), 3) + self.assertEqual(local.clamp_frame_depth(5), 5) + self.assertEqual(local.clamp_frame_depth(99), local.DEFAULT_MAX_FRAME_DEPTH) + self.assertEqual(local.clamp_frame_depth("not-a-number"), local.DEFAULT_MAX_FRAME_DEPTH) + self.assertEqual(local.clamp_frame_depth(None), local.DEFAULT_MAX_FRAME_DEPTH) + + def test_normalize_ignore_selectors(self): + self.assertEqual(local.normalize_ignore_selectors(None), []) + self.assertEqual(local.normalize_ignore_selectors(''), []) + self.assertEqual(local.normalize_ignore_selectors(' '), []) + self.assertEqual(local.normalize_ignore_selectors('.x'), ['.x']) + self.assertEqual(local.normalize_ignore_selectors(['.x', '', None, '.y']), + ['.x', '.y']) + self.assertEqual(local.normalize_ignore_selectors(123), []) + + def test_resolve_max_frame_depth_precedence(self): + # options camel takes precedence + self.assertEqual(local.resolve_max_frame_depth( + {'maxIframeDepth': 2}, {'snapshot': {'maxIframeDepth': 4}}), 2) + # options snake works + self.assertEqual(local.resolve_max_frame_depth( + {'max_iframe_depth': 3}, None), 3) + # falls back to config + self.assertEqual(local.resolve_max_frame_depth( + {}, {'snapshot': {'maxIframeDepth': 4}}), 4) + # defaults when nothing + self.assertEqual(local.resolve_max_frame_depth({}, {}), + local.DEFAULT_MAX_FRAME_DEPTH) + + def test_resolve_ignore_selectors_precedence(self): + self.assertEqual(local.resolve_ignore_selectors( + {'ignoreIframeSelectors': '.a'}, None), ['.a']) + self.assertEqual(local.resolve_ignore_selectors( + {'ignore_iframe_selectors': ['.b', '.c']}, None), ['.b', '.c']) + self.assertEqual(local.resolve_ignore_selectors( + {}, {'snapshot': {'ignoreIframeSelectors': ['.d']}}), ['.d']) + self.assertEqual(local.resolve_ignore_selectors({}, {}), []) + + def test_enumerate_iframes_script_embeds_selectors(self): + script = local.enumerate_iframes_script(['.a', '.b']) + self.assertIn('".a"', script) + self.assertIn('".b"', script) + self.assertIn('querySelectorAll(\'iframe\')', script) + self.assertIn('data-percy-element-id', script) + self.assertIn("hasAttribute('data-percy-ignore')", script) + + def test_enumerate_iframes_script_with_no_selectors(self): + script = local.enumerate_iframes_script(None) + self.assertIn('= [];', script) + + +class TestIframeTreeBehavior(unittest.TestCase): + """Tests exercising the depth cap, cycle guard, ignore attrs, and selectors.""" + + @staticmethod + def _meta(src, percy_id, *, srcdoc=None, ignore=False, matches=False, index=0): + return { + "src": src, + "srcdoc": srcdoc, + "percyElementId": percy_id, + "dataPercyIgnore": ignore, + "matchesIgnoreSelector": matches, + "index": index, + } + + def test_data_percy_ignore_attribute_skips_frame(self): + """An iframe carrying data-percy-ignore is dropped before any switch.""" + driver = Mock() + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://cross.example.com/p", "pid-1", ignore=True)], + ] + driver.current_url = "http://main.example.com/" + + dom = local.get_serialized_dom(driver, [], percy_dom_script="script") + self.assertNotIn("corsIframes", dom) + + def test_ignore_iframe_selectors_option_skips_matched_frame(self): + """A frame whose `matchesIgnoreSelector` was set by the enumerate + script is dropped. The selector list is forwarded into the JS.""" + driver = Mock() + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://cross.example.com/p", "pid-1", matches=True)], ] driver.current_url = "http://main.example.com/" - fail_frame = Mock() - fail_frame.get_attribute = lambda attr: ( - "https://fail.example.com/page" if attr == 'src' else "pid-fail" + dom = local.get_serialized_dom( + driver, [], percy_dom_script="script", + ignoreIframeSelectors=['.ad', '.tracker'] ) - ok_frame = Mock() - ok_frame.get_attribute = lambda attr: ( - "https://ok.example.com/page" if attr == 'src' else "pid-ok" + self.assertNotIn("corsIframes", dom) + + # The selectors must have been baked into the enumerate JS the SDK ran. + enumerate_call = driver.execute_script.call_args_list[1][0][0] + self.assertIn('".ad"', enumerate_call) + self.assertIn('".tracker"', enumerate_call) + + def test_post_switch_url_recheck_drops_about_blank(self): + """If document.URL inside the frame becomes unsupported (about:blank, + net-error), drop the frame and do not serialize.""" + driver = Mock() + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://cross.example.com/p", "pid-1")], + Mock(), # querySelector for the iframe element + "about:blank", # post-switch document.URL is unsupported + ] + driver.current_url = "http://main.example.com/" + + dom = local.get_serialized_dom(driver, [], percy_dom_script="script") + self.assertNotIn("corsIframes", dom) + # parent_frame called in finally + driver.switch_to.parent_frame.assert_called_once() + + def test_max_iframe_depth_caps_recursion(self): + """With maxIframeDepth=1, nested iframes are not entered.""" + driver = Mock() + # Top-level frame at depth 1 is captured; nested children are skipped + # because depth+1 > max. + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://a.example.com/", "pid-1")], + Mock(), + "https://a.example.com/", + None, + {"snapshot": {"html": ""}, "frameUrl": "https://a.example.com/"}, + ] + driver.current_url = "http://main.example.com/" + + dom = local.get_serialized_dom( + driver, [], percy_dom_script="script", maxIframeDepth=1 ) - driver.find_elements.return_value = [fail_frame, ok_frame] + self.assertEqual(len(dom["corsIframes"]), 1) + # Only 6 execute_script calls — no nested-enumerate call. + self.assertEqual(driver.execute_script.call_count, 6) + + def test_ancestor_cycle_guard_stops_descent(self): + """If a nested iframe's src appears in the ancestor chain, it is not + recursed into.""" + driver = Mock() + driver.execute_script.side_effect = [ + {"html": ""}, + # top-level + [self._meta("https://a.example.com/", "pid-a")], + # switch into a + Mock(), + "https://a.example.com/", + None, + {"snapshot": {"html": ""}, "frameUrl": "https://a.example.com/"}, + # nested enumeration inside a: cycles back to the page URL + [self._meta("http://main.example.com/", "pid-cycle")], + ] + driver.current_url = "http://main.example.com/" + + dom = local.get_serialized_dom(driver, [], percy_dom_script="script") + self.assertEqual(len(dom["corsIframes"]), 1) + self.assertEqual(dom["corsIframes"][0]["iframeData"]["percyElementId"], "pid-a") + + def test_percy_context_lost_preserves_partial_capture(self): + """If parent_frame() fails at depth > 1, raise PercyContextLost and have + the top-level walk include whatever was captured so far.""" + driver = Mock() + # First top-level frame captures successfully and recurses; the nested + # frame raises on parent_frame, which surfaces as PercyContextLost. + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://a.example.com/", "pid-a"), + self._meta("https://b.example.com/", "pid-b", index=1)], + # frame a + Mock(), "https://a.example.com/", None, + {"snapshot": {"html": ""}, "frameUrl": "https://a.example.com/"}, + # nested enumeration inside a + [self._meta("https://c.example.com/", "pid-c")], + # frame c (nested) + Mock(), "https://c.example.com/", None, + {"snapshot": {"html": ""}, "frameUrl": "https://c.example.com/"}, + [], + ] + driver.current_url = "http://main.example.com/" + # parent_frame fails on the *second* call (returning from c -> a). + call_count = {'n': 0} + def parent_frame_side_effect(): + call_count['n'] += 1 + if call_count['n'] == 1: + raise RuntimeError("lost context") + driver.switch_to.parent_frame.side_effect = parent_frame_side_effect dom = local.get_serialized_dom(driver, [], percy_dom_script="script") self.assertIn("corsIframes", dom) - self.assertEqual(len(dom["corsIframes"]), 1) - self.assertEqual(dom["corsIframes"][0]["iframeData"]["percyElementId"], "pid-ok") - self.assertEqual(dom["corsIframes"][0]["iframeSnapshot"]["html"], "") + # The partial capture from inside frame a (a + c) must be preserved. + pids = [e["iframeData"]["percyElementId"] for e in dom["corsIframes"]] + self.assertIn("pid-a", pids) + self.assertIn("pid-c", pids) + # frame b must NOT have been processed — sibling iteration aborted. + self.assertNotIn("pid-b", pids) + + +class TestExposeClosedShadowRoots(unittest.TestCase): + def test_noop_on_driver_without_cdp(self): + """A driver without execute_cdp_cmd is a silent no-op.""" + class StubDriver: # pylint: disable=too-few-public-methods + # no execute_cdp_cmd + pass + # Should not raise + local.expose_closed_shadow_roots(StubDriver()) + + def test_noop_when_dom_enable_fails(self): + """If DOM.enable raises (non-Chromium), we exit silently before + anything else runs.""" + driver = Mock() + driver.execute_cdp_cmd.side_effect = Exception("cdp not supported") + local.expose_closed_shadow_roots(driver) + # only the failed DOM.enable call happens + driver.execute_cdp_cmd.assert_called_once_with("DOM.enable", {}) + + def test_exposes_closed_roots_via_weakmap(self): + """When CDP returns a closed shadow root, exposeClosedShadowRoots + creates the WeakMap and calls Runtime.callFunctionOn to populate it.""" + driver = Mock() + cdp_calls = [] + def cdp(cmd, params): # pylint: disable=too-many-return-statements + cdp_calls.append(cmd) + if cmd == "DOM.enable": + return {} + if cmd == "DOM.getDocument": + return { + "root": { + "backendNodeId": 1, + "shadowRoots": [], + "children": [{ + "backendNodeId": 2, + "shadowRoots": [{ + "backendNodeId": 3, + "shadowRootType": "closed", + "children": [] + }], + }] + } + } + if cmd == "DOM.resolveNode": + if params["backendNodeId"] == 2: + return {"object": {"objectId": "host-obj"}} + if params["backendNodeId"] == 3: + return {"object": {"objectId": "shadow-obj"}} + if cmd == "Runtime.callFunctionOn": + return {} + if cmd == "DOM.disable": + return {} + return {} + driver.execute_cdp_cmd.side_effect = cdp + + local.expose_closed_shadow_roots(driver) + + # WeakMap was created on the page + scripts = [c[0][0] for c in driver.execute_script.call_args_list] + self.assertTrue(any("__percyClosedShadowRoots" in s for s in scripts)) + self.assertIn("Runtime.callFunctionOn", cdp_calls) + self.assertIn("DOM.resolveNode", cdp_calls) + + def test_skips_content_document_subtrees(self): + """Closed shadow roots inside an iframe's contentDocument are not + exposed (their JS context is separate from the page main world).""" + driver = Mock() + def cdp(cmd, _params): + if cmd == "DOM.getDocument": + return { + "root": { + "backendNodeId": 1, + "children": [{ + "backendNodeId": 2, + "contentDocument": { + "backendNodeId": 100, + "shadowRoots": [{ + "backendNodeId": 101, + "shadowRootType": "closed", + }], + }, + }] + } + } + return {} + driver.execute_cdp_cmd.side_effect = cdp + local.expose_closed_shadow_roots(driver) + # No Runtime.callFunctionOn — the closed root inside contentDocument + # was skipped, and execute_script for the WeakMap was never run. + scripts = [c[0][0] for c in driver.execute_script.call_args_list] + self.assertFalse(any("__percyClosedShadowRoots" in s for s in scripts)) class TestCreateRegion(unittest.TestCase): @@ -1536,5 +1844,211 @@ def test_create_region_with_invalid_algorithm(self): self.assertEqual(result, expected_result) +class TestShouldSkipIframeOrdering(unittest.TestCase): + # pylint: disable=protected-access + """Regression tests for the srcdoc-vs-unsupported-src ordering fix.""" + + def test_pure_srcdoc_iframe_with_empty_src_takes_srcdoc_branch(self): + """A pure-srcdoc iframe has empty src; the srcdoc branch must run BEFORE + the unsupported-src check so we route it the same way percy-nightwatch + does (inlined via PercyDOM.serialize).""" + with patch.object(local, 'log') as mock_log: + iframe = { + 'src': '', + 'srcdoc': '

hello

', + 'percyElementId': 'pid-srcdoc', + 'dataPercyIgnore': False, + 'matchesIgnoreSelector': False, + 'index': 3, + } + self.assertTrue(local._should_skip_iframe( + iframe, "http://main.example.com")) + # The skip reason must be the srcdoc branch, not 'unsupported src'. + messages = [c.args[0] for c in mock_log.call_args_list] + self.assertTrue(any("Skipping srcdoc iframe" in m for m in messages), + f"Expected srcdoc skip reason, got: {messages}") + self.assertFalse(any("unsupported iframe src" in m for m in messages)) + + def test_srcdoc_with_supported_src_still_takes_srcdoc_branch(self): + """If a frame carries srcdoc AND a real src, srcdoc still wins — + the inlined HTML takes precedence over the cross-origin load.""" + with patch.object(local, 'log') as mock_log: + iframe = { + 'src': 'https://cross.example.com/page', + 'srcdoc': '

doc

', + 'percyElementId': 'pid-both', + 'dataPercyIgnore': False, + 'matchesIgnoreSelector': False, + 'index': 0, + } + self.assertTrue(local._should_skip_iframe( + iframe, "http://main.example.com")) + messages = [c.args[0] for c in mock_log.call_args_list] + self.assertTrue(any("Skipping srcdoc iframe" in m for m in messages)) + + +class TestRedirectCycleGuardOnFrameUrl(unittest.TestCase): + """A frame whose static src differs from its resolved document.URL still + has to be caught by the cycle guard.""" + + @staticmethod + def _meta(src, percy_id, *, srcdoc=None, ignore=False, matches=False, index=0): + return { + "src": src, "srcdoc": srcdoc, "percyElementId": percy_id, + "dataPercyIgnore": ignore, "matchesIgnoreSelector": matches, + "index": index, + } + + def test_redirect_chain_cycle_caught_on_frameUrl(self): + """page_url=B. Frame on page has src=A which redirects → document.URL=B. + The cycle must be detected via the post-switch frameUrl, not just src.""" + driver = Mock() + # Calls in order: + # 0: main serialize + # 1: enumerate top-level iframes + # 2: querySelector for the iframe element + # 3: post-switch document.URL → resolves to the page URL (cycle!) + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://a.example.com/redirector", "pid-a")], + Mock(name="iframe_element"), + "http://main.example.com/", # resolved to page URL → cycle + ] + driver.current_url = "http://main.example.com/" + + dom = local.get_serialized_dom( + driver, [], percy_dom_script="some_script" + ) + + # Frame was switched into and back out of, but never serialized. + self.assertNotIn("corsIframes", dom) + # We did the post-switch URL read (call #3 above) and then bailed — + # no PercyDOM.serialize on this frame, no nested enumeration. + self.assertEqual(driver.execute_script.call_count, 4) + driver.switch_to.parent_frame.assert_called_once() + + +class TestTopLevelIframeFailureLog(unittest.TestCase): + """When a depth==1 iframe fails to process, the user should see it at + info level (not buried at debug).""" + + @staticmethod + def _meta(src, percy_id): + return { + "src": src, "srcdoc": None, "percyElementId": percy_id, + "dataPercyIgnore": False, "matchesIgnoreSelector": False, + "index": 0, + } + + def test_depth1_failure_logs_at_info(self): + driver = Mock() + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://cross.example.com/page", "pid-1")], + Mock(), # iframe element + "https://cross.example.com/page", # post-switch document.URL + Exception("dom inject blew up"), # injection of PercyDOM raises + ] + driver.current_url = "http://main.example.com/" + + with patch.object(local, 'log') as mock_log: + local.get_serialized_dom(driver, [], percy_dom_script="script") + + # One of the log calls must mention the failure AT info level. + failure_calls = [ + c for c in mock_log.call_args_list + if "Failed to process cross-origin iframe" in c.args[0] + ] + self.assertEqual(len(failure_calls), 1, "expected exactly one failure log") + # Second positional arg, if present, is the level. Default is 'info'. + lvl = failure_calls[0].args[1] if len(failure_calls[0].args) > 1 \ + else failure_calls[0].kwargs.get('lvl', 'info') + self.assertEqual(lvl, "info") + + def test_nested_failure_stays_at_debug(self): + """Depth > 1 failures stay at debug to avoid log spam on chatty pages.""" + driver = Mock() + driver.execute_script.side_effect = [ + {"html": ""}, + [self._meta("https://a.example.com/", "pid-a")], + Mock(), "https://a.example.com/", None, + {"snapshot": {"html": "
"}, "frameUrl": "https://a.example.com/"}, + # nested enumeration inside a returns a child + [self._meta("https://b.example.com/", "pid-b")], + # nested child blows up at injection + Mock(), "https://b.example.com/", + Exception("nested inject blew up"), + ] + driver.current_url = "http://main.example.com/" + + with patch.object(local, 'log') as mock_log: + local.get_serialized_dom(driver, [], percy_dom_script="script") + + nested_failures = [ + c for c in mock_log.call_args_list + if "Failed to process cross-origin iframe https://b.example.com" in c.args[0] + ] + self.assertEqual(len(nested_failures), 1) + lvl = nested_failures[0].args[1] if len(nested_failures[0].args) > 1 else "info" + self.assertEqual(lvl, "debug") + + +class TestIterativeShadowWalker(unittest.TestCase): + """The closed-shadow walker must handle very deep trees without + RecursionError, which the outer except would otherwise swallow.""" + + def test_deeply_nested_tree_does_not_raise_recursion_error(self): + # Build a deep linear chain well beyond CPython's default recursion + # limit (~1000). Recursive walker would RecursionError; iterative + # walker must finish cleanly. + deep = 3000 + leaf = {"backendNodeId": deep + 1, "shadowRoots": [], "children": []} + node = leaf + for i in range(deep, 0, -1): + node = { + "backendNodeId": i, + "shadowRoots": [], + "children": [node], + } + # Plant one closed shadow root deep in the tree so we can assert it + # was discovered. + cursor = node + for _ in range(deep // 2): + cursor = cursor["children"][0] + cursor["shadowRoots"] = [{ + "backendNodeId": 999999, + "shadowRootType": "closed", + "children": [], + }] + + driver = Mock() + def cdp(cmd, params): + if cmd == "DOM.enable": + return {} + if cmd == "DOM.getDocument": + return {"root": node} + if cmd == "DOM.resolveNode": + return {"object": {"objectId": f"obj-{params.get('backendNodeId')}"}} + if cmd == "Runtime.callFunctionOn": + return {} + if cmd == "DOM.disable": + return {} + return {} + driver.execute_cdp_cmd.side_effect = cdp + + # If the walker were still recursive this would silently no-op (the + # broad except swallows RecursionError). With the iterative walker we + # must actually see the WeakMap script + the Runtime.callFunctionOn + # call for the planted closed shadow root. + local.expose_closed_shadow_roots(driver) + + scripts = [c.args[0] for c in driver.execute_script.call_args_list] + self.assertTrue(any("__percyClosedShadowRoots" in s for s in scripts), + "WeakMap script must have run — walker did not find " + "the deeply nested closed shadow root") + cdp_cmds = [c.args[0] for c in driver.execute_cdp_cmd.call_args_list] + self.assertIn("Runtime.callFunctionOn", cdp_cmds) + + if __name__ == '__main__': unittest.main()