diff --git a/.github/scripts/render_report.py b/.github/scripts/render_report.py new file mode 100644 index 00000000..d06bdf7a --- /dev/null +++ b/.github/scripts/render_report.py @@ -0,0 +1,855 @@ +"""Render a pytest JUnit xunit.xml to a nicer HTML report. + +Tests are grouped by their parent directory. Per-test ROS log slices come +from `/ros_logs/rosout.ndjson` (one JSON object per line, +captured from the /rosout topic by lab_sim/test/conftest.py). +""" + +from __future__ import annotations + +import argparse +import bisect +import datetime as dt +import html +import json +import os.path +import re +import sys +import xml.etree.ElementTree as ET +from collections import defaultdict +from pathlib import Path + + +# ---------- formatting helpers ---------- + + +def format_duration(seconds: float) -> str: + if seconds < 60: + return f"{seconds:.1f}s" + minutes, secs = divmod(seconds, 60) + if minutes < 60: + return f"{int(minutes)}m {secs:.0f}s" + hours, minutes = divmod(int(minutes), 60) + return f"{hours}h {minutes}m {secs:.0f}s" + + +def format_timestamp(iso: str) -> str: + if not iso: + return "" + try: + t = dt.datetime.fromisoformat(iso) + return t.strftime("%b %d, %Y · %H:%M:%S") + except ValueError: + return iso + + +def parse_iso_to_unix(iso: str) -> float | None: + if not iso: + return None + try: + t = dt.datetime.fromisoformat(iso) + if t.tzinfo is None: + t = t.replace(tzinfo=dt.timezone.utc) + return t.timestamp() + except ValueError: + return None + + +# ---------- test name + objective resolution ---------- + + +def split_path(test_name: str) -> tuple[str, str, str]: + """Return (parent_dir, filename, full). Handles non-path test names too.""" + m = re.search(r"\[([^\]]+)\]", test_name) + full = m.group(1) if m else test_name + if "/" not in full: + return "", full, full + parent, fname = os.path.split(full) + return parent, fname, full + + +_RE_EXAMPLE_WS = re.compile( + r"^/__w/([^/]+)/\1/install/([^/]+)/share/\2/objectives/(.+)$" +) +_RE_OVERLAY = re.compile(r"^/opt/overlay_ws/install/([^/]+)/share/\1/objectives/(.+)$") + +_EXAMPLE_WS_ROOTS = [ + Path.home() / "code" / "moveit_pro_example_ws", +] +# Also resolve via the renderer's CWD so the GitHub Actions render-report job +# (which checks out the repo at $GITHUB_WORKSPACE and runs the renderer from +# there) can find objective XMLs without needing the hardcoded local path above. +_EXAMPLE_WS_ROOTS.append(Path.cwd()) +_MOVEIT_PRO_INSTALL_ROOT = Path.home() / "code" / "moveit_pro" / ".colcon" / "install" +_MOVEIT_PRO_LOCAL = Path.home() / "code" / "moveit_pro" +_CONTAINER_PREFIX = "/opt/overlay_ws" + + +def find_local_xml(ci_path: str) -> Path | None: + m = _RE_EXAMPLE_WS.match(ci_path) + if m: + pkg, rel = m.group(2), m.group(3) + for root in _EXAMPLE_WS_ROOTS: + candidate = root / "src" / pkg / "objectives" / rel + if candidate.exists(): + return candidate + return None + m = _RE_OVERLAY.match(ci_path) + if m: + pkg, rel = m.group(1), m.group(2) + install_link = ( + _MOVEIT_PRO_INSTALL_ROOT / pkg / "share" / pkg / "objectives" / rel + ) + if install_link.is_symlink(): + target = os.readlink(install_link) + if target.startswith(_CONTAINER_PREFIX + "/"): + candidate = _MOVEIT_PRO_LOCAL / target[len(_CONTAINER_PREFIX) + 1 :] + if candidate.exists(): + return candidate + elif install_link.exists(): + return install_link + return None + + +_OBJECTIVE_NAME_CACHE: dict[str, str | None] = {} + + +def extract_objective_name(ci_path: str) -> str | None: + if ci_path in _OBJECTIVE_NAME_CACHE: + return _OBJECTIVE_NAME_CACHE[ci_path] + local = find_local_xml(ci_path) + name = None + if local is not None: + try: + tree = ET.parse(local) + root = tree.getroot() + if "main_tree_to_execute" in root.attrib: + name = root.attrib["main_tree_to_execute"] + if name is None: + for tnm in root.iter("TreeNodesModel"): + for sub in tnm.findall("SubTree"): + if "ID" in sub.attrib: + name = sub.attrib["ID"] + break + if name: + break + if name is None: + for bt in root.iter("BehaviorTree"): + if "ID" in bt.attrib: + name = bt.attrib["ID"] + break + except (ET.ParseError, OSError): + pass + _OBJECTIVE_NAME_CACHE[ci_path] = name + return name + + +# ---------- ROS log loading + slicing ---------- + +# launch.log line formats — two patterns. Order matters: the leveled form is +# more specific (requires `]:` after the second bracket) and must be tried +# first, otherwise the passthrough regex would mis-capture `[LEVEL]` as the +# process name. +# +# Leveled (launch's own events): ` [LEVEL] [name]: msg` +_RE_LAUNCH_LEVELED = re.compile( + r"^(?P\d+(?:\.\d+)?)\s+\[(?P[A-Z]+)\]\s+\[(?P[^\]]+)\]:\s+(?P.*)$" +) +# Passthrough (stdout/stderr from launched processes): ` [name-N] msg` +_RE_LAUNCH_PASSTHROUGH = re.compile( + r"^(?P\d+(?:\.\d+)?)\s+\[(?P[^\]]+)\]\s+(?P.*)$" +) +# Strip the launch sequence suffix (`-14` → ``) so passthrough lines align +# with /rosout entries from the same node. +_RE_LAUNCH_SEQ_SUFFIX = re.compile(r"-\d+$") + +# A passthrough line is the raw stdout/stderr of a launched process. ROS code +# often prints its own `[LEVEL]` bracket inside that text (sometimes with ANSI +# color codes around it). Extract that level when present so the renderer can +# style the entry correctly instead of dumping it as INFO. +_RE_PASSTHROUGH_EMBED_LEVEL = re.compile( + r"^\s*(?:\x1b\[\d+m)?\[(DEBUG|INFO|WARN|WARNING|ERROR|FATAL)\]" +) + +# When a passthrough line has no embedded level (stderr from a dying process, +# uncaught exception text, etc.) but its content matches one of these markers, +# treat it as ERROR. Without this, boot-crash diagnostics like +# `Aborted (Signal sent by tkill())` or `what(): Failed to load RobotModel` +# get dumped at INFO severity and disappear when the user filters to errors. +# Markers split into two groups: +# - LINE_START: anchored to the start of the message text. Avoids false +# positives like "Operation Aborted by user" or a docstring mention of +# "Traceback" being elevated to ERROR. +# - SUBSTRING: stable enough as substrings (highly specific, unlikely to +# appear in benign log content). +_PASSTHROUGH_ERROR_LINE_START = ( + "Aborted", + "Segmentation fault", + "Stack trace", + "Traceback", + "terminate called", +) +_PASSTHROUGH_ERROR_SUBSTRING = ( + "SIGSEGV", + "SIGABRT", + "what():", +) + + +def _classify_passthrough(msg: str) -> str: + m = _RE_PASSTHROUGH_EMBED_LEVEL.match(msg) + if m: + level = m.group(1).upper() + return "WARN" if level == "WARNING" else level + stripped = msg.lstrip() + for marker in _PASSTHROUGH_ERROR_LINE_START: + if stripped.startswith(marker): + return "ERROR" + for marker in _PASSTHROUGH_ERROR_SUBSTRING: + if marker in msg: + return "ERROR" + return "INFO" + + +def load_rosout_ndjson(ndjson_path: Path) -> list[tuple[float, str, str, str]]: + """Read structured /rosout log entries from rosout.ndjson. + + The file is produced by lab_sim/test/conftest.py via a session-scoped + subscriber to /rosout. Each line is one JSON object: {ts, level, node, msg}. + Going through /rosout (a published rcl_interfaces/msg/Log topic with a + stable schema) instead of regex-parsing rcutils file-logger output + insulates the renderer from rcutils' textual format, which has no + stability contract. + """ + if not ndjson_path.is_file(): + return [] + out: list[tuple[float, str, str, str]] = [] + try: + with open(ndjson_path, encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + out.append( + ( + float(entry["ts"]), + str(entry.get("level", "")).upper(), + str(entry.get("node", "")), + str(entry.get("msg", "")), + ) + ) + except (json.JSONDecodeError, KeyError, TypeError, ValueError): + continue + except OSError: + return [] + out.sort(key=lambda x: x[0]) + return out + + +def load_launch_logs(logs_dir: Path) -> list[tuple[float, str, str, str]]: + """Read every //launch.log file produced by ros2 launch. + + Secondary source alongside /rosout. Critical for diagnosing boot + failures where a node crashes before publishing anything to /rosout — + the stack trace and SIGSEGV/SIGABRT signal lines live in launch.log's + stdout/stderr passthrough only. Format is best-effort; lines that + don't match either regex (e.g. raw stderr without a timestamp prefix + from a child process) are dropped. + """ + if not logs_dir.is_dir(): + return [] + out: list[tuple[float, str, str, str]] = [] + for launch_log in sorted(logs_dir.glob("*/launch.log")): + try: + with open(launch_log, encoding="utf-8", errors="replace") as f: + for line in f: + m = _RE_LAUNCH_LEVELED.match(line) + if m: + try: + out.append( + ( + float(m["ts"]), + m["level"].upper(), + m["name"], + m["msg"].rstrip(), + ) + ) + except ValueError: + pass + continue + m2 = _RE_LAUNCH_PASSTHROUGH.match(line) + if m2: + try: + name = _RE_LAUNCH_SEQ_SUFFIX.sub("", m2["name"]) + msg = m2["msg"].rstrip() + out.append( + ( + float(m2["ts"]), + _classify_passthrough(msg), + name, + msg, + ) + ) + except ValueError: + pass + except OSError: + continue + return out + + +def slice_log( + all_lines: list[tuple[float, str, str, str]], + starts: list[float], + t_start: float, + t_end: float, +) -> list[tuple[float, str, str, str]]: + lo = bisect.bisect_left(starts, t_start) + hi = bisect.bisect_right(starts, t_end) + return all_lines[lo:hi] + + +def classify(tc: ET.Element) -> str: + if tc.find("failure") is not None: + return "failed" + if tc.find("error") is not None: + return "error" + if tc.find("skipped") is not None: + return "skipped" + return "passed" + + +# ---------- main ---------- + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Render a pytest JUnit xunit.xml to an HTML report.", + ) + parser.add_argument("input", type=Path, help="Path to xunit.xml") + parser.add_argument("output", type=Path, help="Path to write HTML report") + parser.add_argument( + "rosout_ndjson", + type=Path, + nargs="?", + default=None, + help="Path to rosout.ndjson " + "(defaults to /ros_logs/rosout.ndjson)", + ) + args = parser.parse_args() + + in_path = args.input + out_path = args.output + rosout_path = ( + args.rosout_ndjson + if args.rosout_ndjson is not None + else in_path.parent / "ros_logs" / "rosout.ndjson" + ) + + # Parse defensively: a truncated upload or a runner that crashed before + # pytest wrote any results will yield malformed/empty XML. The render-report + # job is `needs:` for the sticky-comment job, so a crash here loses the + # comment exactly when it would be most useful. Degrade gracefully instead. + try: + tree = ET.parse(in_path) + except ET.ParseError as exc: + print(f"render_report: could not parse {in_path}: {exc}", file=sys.stderr) + sys.exit(0) + root = tree.getroot() + suite = root.find("testsuite") if root.tag == "testsuites" else root + if suite is None: + print( + f"render_report: no in {in_path}; nothing to render", + file=sys.stderr, + ) + sys.exit(0) + + timestamp = suite.attrib.get("timestamp", "") + hostname = suite.attrib.get("hostname", "") + total_time = float(suite.attrib.get("time", 0)) + suite_unix = parse_iso_to_unix(timestamp) + + # Load ROS logs (file logger + launch-wrapped stdout) once. + # Primary source: structured /rosout NDJSON captured by conftest. Secondary + # source: launch.log stdout/stderr passthrough, critical for boot failures + # where a node SIGABRTs before it publishes anything to /rosout. + ros_lines = sorted( + load_rosout_ndjson(rosout_path) + load_launch_logs(rosout_path.parent), + key=lambda x: x[0], + ) + ros_starts = [entry[0] for entry in ros_lines] + + groups: dict[str, list[dict]] = defaultdict(list) + counts = {"passed": 0, "failed": 0, "error": 0, "skipped": 0} + cumulative_time = 0.0 + first_non_skipped_seen = False + + for tc in suite.findall("testcase"): + status = classify(tc) + counts[status] += 1 + parent, fname, full = split_path(tc.attrib.get("name", "")) + time_s = float(tc.attrib.get("time", 0)) + + # Compute per-test log window. Pad it generously — pytest's only covers the test call, not fixture setup/teardown, so the + # cumulative-time cursor drifts a second or two per test against the + # log file's wall-clock timestamps. Without padding, very-short tests + # (~0.5s failures from BT-load errors) miss their own error line. + pad_before = 2.0 + pad_after = 5.0 + t_start = (suite_unix + cumulative_time) if suite_unix else None + t_end = (suite_unix + cumulative_time + time_s) if suite_unix else None + # Skipped tests don't actually consume sim time, so don't advance the + # cumulative cursor for them. + if status != "skipped": + cumulative_time += time_s + + # The first non-skipped test absorbs any pre-suite log entries + # (e.g., a backend node crashing during launch.bringup before the + # suite timestamp was emitted). Without this, boot-failure stack + # traces in launch.log fall outside every test's window and get + # rendered as "No ROS log lines in this test's time window." + extend_back_to: float | None = None + if ( + t_start is not None + and status != "skipped" + and not first_non_skipped_seen + and ros_starts + and ros_starts[0] < t_start + ): + extend_back_to = ros_starts[0] + if status != "skipped": + first_non_skipped_seen = True + + if t_start is not None and ros_lines: + # extend_back_to is a widen-only override: take whichever is earlier + # so the first-test window never gets narrower than any other test's. + window_start = t_start - pad_before + if extend_back_to is not None: + window_start = min(window_start, extend_back_to) + log_slice = slice_log( + ros_lines, ros_starts, window_start, t_end + pad_after + ) + else: + log_slice = [] + + warn_n = sum(1 for _, lvl, _, _ in log_slice if lvl == "WARN") + err_n = sum(1 for _, lvl, _, _ in log_slice if lvl in ("ERROR", "FATAL")) + + groups[parent].append( + { + "status": status, + "fname": fname, + "full": full, + "objective_name": extract_objective_name(full), + "time": time_s, + "t_start": t_start, + "t_end": t_end, + "log_slice": log_slice, + "warn_n": warn_n, + "err_n": err_n, + } + ) + + status_rank = {"failed": 0, "error": 1, "passed": 2, "skipped": 3} + for tests in groups.values(): + tests.sort(key=lambda r: (status_rank[r["status"]], -r["time"], r["fname"])) + + def group_key(item): + parent, tests = item + fails = sum(1 for t in tests if t["status"] in ("failed", "error")) + return (-fails, parent) + + sorted_groups = sorted(groups.items(), key=group_key) + total = sum(counts.values()) + + executed_times = [ + t["time"] + for tests in groups.values() + for t in tests + if t["status"] != "skipped" + ] + avg_exec_time = ( + (sum(executed_times) / len(executed_times)) if executed_times else 0.0 + ) + + def status_badge(s: str) -> str: + symbols = {"passed": "✓", "failed": "✕", "error": "!", "skipped": "−"} + return f'{symbols[s]} {s}' + + def render_log_slice(log_slice: list[tuple[float, str, str, str]]) -> str: + if not log_slice: + return '
No ROS log lines in this test\'s time window.
' + # Fold every duplicate (level, node, msg) in the slice down to one + # entry, anchored at the first occurrence's timestamp, with a count + # badge for repeats. Strict-consecutive collapsing misses cases where + # a retry loop's messages interleave with other nodes' output — group + # collapsing handles those. + seen: dict[tuple[str, str, str], list] = {} + ordered_keys: list[tuple[str, str, str]] = [] + for ts, lvl, node, msg in log_slice: + key = (lvl, node, msg) + if key in seen: + seen[key][1] += 1 + else: + seen[key] = [ts, 1] + ordered_keys.append(key) + collapsed: list[tuple[float, str, str, str, int]] = [ + (seen[k][0], k[0], k[1], k[2], seen[k][1]) for k in ordered_keys + ] + + rows: list[str] = [] + anchor = collapsed[0][0] + for ts, lvl, node, msg, count in collapsed: + rel = ts - anchor + level_class = ( + lvl.lower() + if lvl in ("ERROR", "FATAL", "WARN", "INFO", "DEBUG") + else "info" + ) + count_badge = ( + f' ×{count}' if count > 1 else "" + ) + rows.append( + f'
' + f'+{rel:6.2f}s' + f'{lvl}' + f'{html.escape(node)}' + f'{html.escape(msg)}{count_badge}' + f"
" + ) + return f'
{"".join(rows)}
' + + section_parts: list[str] = [] + test_idx = 0 + for parent, tests in sorted_groups: + g_fails = sum(1 for t in tests if t["status"] in ("failed", "error")) + g_passes = sum(1 for t in tests if t["status"] == "passed") + g_skips = sum(1 for t in tests if t["status"] == "skipped") + chips: list[str] = [] + if g_fails: + chips.append(f'{g_fails} fail') + if g_passes: + chips.append(f'{g_passes} pass') + if g_skips: + chips.append(f'{g_skips} skip') + + rows_html: list[str] = [] + for r in tests: + has_logs = bool(r["log_slice"]) + details_html = ( + f'' + f'{render_log_slice(r["log_slice"])}' + ) + click = ( + f"onclick=\"document.getElementById('d{test_idx}').hidden = " + f"!document.getElementById('d{test_idx}').hidden\"" + ) + cursor = 'style="cursor: pointer"' + + # Replace the pytest-fail message with a log summary. + if has_logs: + pieces: list[str] = [] + if r["err_n"]: + pieces.append( + f'{r["err_n"]} error{"s" if r["err_n"] != 1 else ""}' + ) + if r["warn_n"]: + pieces.append( + f'{r["warn_n"]} warning{"s" if r["warn_n"] != 1 else ""}' + ) + info_n = sum(1 for _, lvl, _, _ in r["log_slice"] if lvl == "INFO") + if info_n: + pieces.append(f'{info_n} info') + msg_cell = " · ".join(pieces) + elif r["status"] == "skipped": + msg_cell = 'skipped' + else: + msg_cell = 'no logs' + + obj_cell = ( + f'{html.escape(r["objective_name"])}' + if r["objective_name"] + else '—' + ) + search_field = html.escape( + (r["objective_name"] or "") + " " + r["fname"] + ).lower() + rows_html.append( + f'' + f'{status_badge(r["status"])}' + f"{obj_cell}" + f'{html.escape(r["fname"])}' + f'{r["time"]:.1f}s' + f'{msg_cell}' + f"{details_html}" + ) + test_idx += 1 + + header_class = ( + "group-failed" + if g_fails + else ("group-skipped" if g_skips and not g_passes else "group-passed") + ) + section_parts.append( + f'
' + f'
' + f'
' + f'' + f'' + f'{html.escape(parent or "(no parent)")}' + f"
" + f'
{"".join(chips)}
' + f"
" + f'{"".join(rows_html)}
' + f"
" + ) + + pass_rate_denom = counts["passed"] + counts["failed"] + counts["error"] + pass_rate = (counts["passed"] / pass_rate_denom * 100) if pass_rate_denom else 0 + + html_out = f""" + + + +MoveIt Pro Objective Integration Tests + + + + +
+

MoveIt Pro Objective Integration Tests

+
+ {total} tests + {format_duration(total_time)} total + {html.escape(format_timestamp(timestamp))} + {html.escape(hostname)} + {len(ros_lines):,} log lines indexed (rosout + launch) +
+
+
+
+
{total}
Total objective tests
+
{counts["passed"]}
Objectives passed
+
{counts["failed"] + counts["error"]}
Objectives failed
+
{counts["skipped"]}
Objectives skipped
+
{avg_exec_time:.1f}s
Avg test time
+
{pass_rate:.0f}%
Objective pass rate
+
+
+ + + + + + +
+ {''.join(section_parts)} +
+ + + +""" + out_path.write_text(html_out, encoding="utf-8") + print(f"Wrote {out_path} ({len(html_out):,} bytes)") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3631feb6..7b38c1a3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,15 +23,264 @@ jobs: with: image_tag: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.ref || github.ref_name }} colcon_test_args: "--executor sequential" - runner: "picknik-16-amd64" - # Coarsen MuJoCo timestep on CI (default 0.002s = 500Hz) so the heavier 3.6.0 - # constraint solver stays at-or-under realtime on CI runners. See + # GPU runner + enable_gpu are required so MuJoCo's EGL offscreen + # rendering can attach to a real GPU instead of falling back to slow + # software rasterization on the CPU-only picknik-16-amd64 runner — the + # integration test exercises camera-bearing scenes (apriltag, perception). + runner: "picknik-16-amd64-gpu" + enable_gpu: true + # Re-assert MuJoCo timestep on CI as a backstop. Scene files in this repo + # are standardized to 0.003s, but this override catches any future scene + # whose include chain bypasses that standard, keeping the heavier 3.6.0 + # constraint solver at-or-under realtime on CI runners. See # PickNikRobotics/moveit_pro#18534 for the underlying flake history. mujoco_ci_timestep: "0.003" use_ccache: true secrets: moveit_license_key: ${{ secrets.STUDIO_CI_LICENSE_KEY }} + # Turn the test-results artifact into a single-file HTML report (status, + # timings, per-test ROS log slice with colors). Matrix on ros_distro because + # the upstream workflow uploads one artifact per distro. Runs whether the + # integration test passed, failed, or timed out -- the report is most useful + # for failure post-mortem. + render-report: + needs: integration-test-in-studio-container + # Run on success and failure, not on cancelled or skipped — `!= cancelled` + # alone would also fire on skipped, where no artifact was uploaded. + if: always() && (needs.integration-test-in-studio-container.result == 'success' || needs.integration-test-in-studio-container.result == 'failure') + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + ros_distro: [humble, jazzy] + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + # The artifact name comes from workspace_integration_test.yaml — the + # double dash is the literal `test-results-${image_tag_suffix}-${distro}` + # template with an empty image_tag_suffix. + # continue-on-error so an early integration-test crash (no artifact + # uploaded) still lets render-report exit cleanly via the [-z XUNIT] + # guard below instead of hard-failing the matrix job. + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + continue-on-error: true + with: + name: test-results--${{ matrix.ros_distro }} + path: artifacts/ + - name: Render HTML report + id: render + run: | + set -euo pipefail + # Pick the integration test xunit specifically. The artifact contains + # ~40 xunit files (lint_cmake / copyright / xmllint / etc., each with + # a single trivial testcase); `find ... | head -1` was grabbing one + # of those alphabetically and producing an empty-looking report. + XUNIT="" + if [ -d artifacts ]; then + XUNIT=$(find artifacts -name 'objectives_integration_test.xunit.xml' 2>/dev/null | head -1 || true) + fi + if [ -z "${XUNIT}" ]; then + echo "No objectives_integration_test.xunit.xml found in artifact; nothing to render." + echo "rendered=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "Rendering ${XUNIT}" + python3 .github/scripts/render_report.py "${XUNIT}" report.html + # Summary stats so publish-and-comment can pick the per-distro icon + # and decide whether to post a comment at all. + XUNIT="${XUNIT}" python3 - <<'PY' + import json, os, xml.etree.ElementTree as ET + try: + root = ET.parse(os.environ['XUNIT']).getroot() + suites = root.findall('testsuite') if root.tag == 'testsuites' else [root] + totals = {'tests': 0, 'failures': 0, 'errors': 0, 'skipped': 0} + for s in suites: + for k in totals: + totals[k] += int(s.attrib.get(k, '0') or '0') + # Clamp to 0 — xunit attrs from third-party runners occasionally + # report skipped > tests or similar inconsistencies, which would + # otherwise produce a negative "passed" count in the comment. + totals['passed'] = max(0, totals['tests'] - totals['failures'] - totals['errors'] - totals['skipped']) + totals['status'] = 'failed' if (totals['failures'] + totals['errors']) > 0 else 'passed' + except (ET.ParseError, OSError, ValueError) as e: + # report.html already rendered, so surface the parse failure via + # status=unknown rather than letting the step fail (and letting + # downstream treat the distro as wholly missing). + totals = {'status': 'unknown', 'error': str(e)} + with open('status.json', 'w') as f: + json.dump(totals, f) + print(totals) + PY + echo "rendered=true" >> "$GITHUB_OUTPUT" + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: steps.render.outputs.rendered == 'true' + with: + name: integration-test-report-${{ matrix.ros_distro }} + path: | + report.html + status.json + retention-days: 15 + + # Publish per-distro reports to GitHub Pages under pr-/run-// + # and post a fresh PR comment per run (not sticky) with direct URLs to the HTML + # pages. Single job (not matrixed) so the two distro artifacts are deployed + # together — avoids gh-pages push races between parallel matrix jobs. + publish-and-comment: + needs: render-report + if: github.event_name == 'pull_request' && always() && (needs.render-report.result == 'success' || needs.render-report.result == 'failure') + runs-on: ubuntu-22.04 + # Serialize gh-pages writes per PR so this job doesn't race with the + # cleanup-pr-reports workflow on `pull_request: closed`. cancel-in-progress + # stays false — we don't want a mid-flight publish silently killed by an + # untimely close event. + concurrency: + group: gh-pages-pr-${{ github.event.pull_request.number }} + cancel-in-progress: false + permissions: + pull-requests: write + # contents:write is required by peaceiris/actions-gh-pages below to push + # to the gh-pages branch. GitHub's permission model can't scope write + # access to a single branch, so the broader grant is unavoidable here. + contents: write + steps: + # Explicit per-distro downloads into named paths rather than `pattern:`. + # actions/download-artifact@v8 with pattern + single match extracts files + # directly to `path:` instead of `path://`, breaking the + # `reports/integration-test-report-/` layout the layout step + # expects. Per-distro downloads sidestep that ambiguity entirely. + # continue-on-error so a missing artifact (distro's render emitted + # rendered=false) doesn't fail the job — the layout step's Python flags + # the distro as status=missing for the comment. + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + continue-on-error: true + with: + name: integration-test-report-humble + path: reports/integration-test-report-humble/ + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + continue-on-error: true + with: + name: integration-test-report-jazzy + path: reports/integration-test-report-jazzy/ + - name: Lay out per-distro reports + id: layout + run: | + set -euo pipefail + PR_NUM="${{ github.event.number }}" + RUN_ID="${{ github.run_id }}" + BASE="pr-${PR_NUM}/run-${RUN_ID}" + mkdir -p "publish/${BASE}" + # .nojekyll prevents Pages from running Jekyll, which would 404 on + # paths containing underscores (e.g. ros2_kortex). + touch publish/.nojekyll + # nullglob keeps the loop a no-op when zero artifacts were downloaded + # (e.g. render-report skipped entirely), instead of running once with + # the literal pattern and silently failing the -f check. + shopt -s nullglob + AVAILABLE=() + for d in reports/integration-test-report-*/; do + distro="${d#reports/integration-test-report-}" + distro="${distro%/}" + if [ -f "${d}report.html" ]; then + mkdir -p "publish/${BASE}/${distro}" + cp "${d}report.html" "publish/${BASE}/${distro}/report.html" + AVAILABLE+=("${distro}") + fi + done + # Build a per-distro status map for the comment step. Distros that + # were expected but produced no status.json (e.g. studio container + # crashed before the test ran) get status=missing so the comment can + # still surface them with a ❌. + STATUS_MAP=$(python3 - <<'PY' + import json, pathlib + # Keep in sync with strategy.matrix.ros_distro in the render-report + # job above. Used to detect distros that produced no artifact at all + # (container crash before xunit upload) so the comment can still flag + # them with ❌ instead of silently omitting them. + expected = ['humble', 'jazzy'] + out = {} + for distro in expected: + p = pathlib.Path(f'reports/integration-test-report-{distro}/status.json') + if p.exists(): + out[distro] = json.loads(p.read_text()) + else: + out[distro] = {'status': 'missing'} + print(json.dumps(out)) + PY + ) + echo "Status map: ${STATUS_MAP}" + # any_failed=true iff at least one distro is not 'passed'. Drives + # whether the comment is posted at all (skip when both passed). + # Pass STATUS_MAP via env (not argv) so any future JSON value + # containing shell metacharacters (quotes, $, backticks) can't break + # the python invocation. + ANY_FAILED=$(STATUS_MAP="${STATUS_MAP}" python3 -c "import json,os; m=json.loads(os.environ['STATUS_MAP']); print('true' if any(v.get('status')!='passed' for v in m.values()) else 'false')") + echo "base=${BASE}" >> "$GITHUB_OUTPUT" + echo "distros=${AVAILABLE[*]}" >> "$GITHUB_OUTPUT" + echo "any_failed=${ANY_FAILED}" >> "$GITHUB_OUTPUT" + { + echo "status_map<> "$GITHUB_OUTPUT" + if [ ${#AVAILABLE[@]} -eq 0 ]; then + echo "any=false" >> "$GITHUB_OUTPUT" + else + echo "any=true" >> "$GITHUB_OUTPUT" + fi + - name: Deploy to GitHub Pages + if: steps.layout.outputs.any == 'true' + uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_branch: gh-pages + publish_dir: ./publish + # keep_files=true so each run only adds its own pr-/run-/ + # subtree instead of wiping prior reports. A cleanup workflow can + # prune pr-/ on PR close if the branch ever gets too big. + keep_files: true + - name: Post PR comment + # Skip the comment entirely when every distro's integration test + # passed — green runs don't need to spam the PR conversation. + if: steps.layout.outputs.any == 'true' && steps.layout.outputs.any_failed == 'true' + uses: actions/github-script@d746ffe35508b1917358783b479e04febd2b8f71 # v9.0.0 + env: + BASE: ${{ steps.layout.outputs.base }} + STATUS_MAP: ${{ steps.layout.outputs.status_map }} + with: + script: | + const base = process.env.BASE; + const statusMap = JSON.parse(process.env.STATUS_MAP); + const owner = context.repo.owner; + const repo = context.repo.repo; + const runUrl = `https://github.com/${owner}/${repo}/actions/runs/${context.runId}`; + const pagesUrl = (distro) => + `https://${owner.toLowerCase()}.github.io/${repo.toLowerCase()}/${base}/${distro}/report.html`; + const lines = Object.entries(statusMap).map(([distro, s]) => { + if (s.status === 'missing') { + return `- ❌ **${distro}**: no report produced — see [run logs](${runUrl})`; + } + if (s.status === 'unknown') { + return `- ⚠️ **${distro}**: report parse failed — see [run logs](${runUrl})`; + } + const icon = s.status === 'passed' ? '✅' : '❌'; + const failed = (s.failures || 0) + (s.errors || 0); + const counts = failed > 0 + ? `${failed} failed, ${s.passed} passed` + : `${s.passed} passed`; + return `- ${icon} **${distro}**: view HTML report — ${counts}`; + }); + const body = [ + '### MoveIt Pro Example WS - Objectives Integration Test Report', + '', + lines.join('\n'), + ].join('\n'); + await github.rest.issues.createComment({ + owner, + repo, + issue_number: context.issue.number, + body, + }); + ensure-no-ssh-in-gitmodules: name: Ensure no SSH URLs in .gitmodules runs-on: ubuntu-22.04 diff --git a/.github/workflows/cleanup-pr-reports.yaml b/.github/workflows/cleanup-pr-reports.yaml new file mode 100644 index 00000000..6264a930 --- /dev/null +++ b/.github/workflows/cleanup-pr-reports.yaml @@ -0,0 +1,54 @@ +# Delete pr-/ from the gh-pages branch when a PR closes (merged or +# abandoned). Bounds gh-pages growth to roughly (open PRs × runs per PR) +# instead of accumulating every report ever produced. +# +# Fork PRs never reach gh-pages in the first place (their token is read-only +# and the publish-and-comment job's push fails), so this workflow is a no-op +# for them — the cleanup step exits early when pr-/ is absent. + +name: Cleanup gh-pages reports on PR close + +on: + pull_request: + types: [closed] + +jobs: + cleanup-reports: + runs-on: ubuntu-22.04 + # Match the publish-and-comment concurrency group in ci.yaml so an + # in-flight publish completes before cleanup runs (or vice versa). This + # avoids cleanup deleting pr-/ while publish is in the middle of + # writing a new run-/ subtree, which would result in a fast-forward + # failure or a partially-restored directory. + concurrency: + group: gh-pages-pr-${{ github.event.pull_request.number }} + cancel-in-progress: false + permissions: + # contents:write is required to push the removal commit to gh-pages. + # Same broad-scope caveat as the publish-and-comment job in ci.yaml: + # GitHub's permission model can't scope write access to a single branch. + contents: write + steps: + - name: Check out gh-pages + # gh-pages may not exist yet on a brand-new repo where no successful + # report deploy has happened. continue-on-error keeps the workflow + # from showing a red X on every early-PR close before the first deploy. + continue-on-error: true + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: gh-pages + fetch-depth: 1 + - name: Remove pr-/ subtree + env: + PR_NUM: ${{ github.event.number }} + run: | + set -euo pipefail + if [ ! -d "pr-${PR_NUM}" ]; then + echo "No pr-${PR_NUM}/ directory in gh-pages; nothing to clean up." + exit 0 + fi + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config user.name "github-actions[bot]" + git rm -rf "pr-${PR_NUM}" + git commit -m "chore(ci): remove gh-pages reports for closed PR #${PR_NUM}" + git push origin gh-pages diff --git a/src/lab_sim/CMakeLists.txt b/src/lab_sim/CMakeLists.txt index 9716c3ae..77bf23d3 100644 --- a/src/lab_sim/CMakeLists.txt +++ b/src/lab_sim/CMakeLists.txt @@ -44,10 +44,16 @@ if(BUILD_TESTING) find_package(ament_cmake_pytest REQUIRED) find_package(ament_lint_auto REQUIRED) ament_lint_auto_find_test_dependencies() + # Redirect ROS node logs into the test_results tree so the test-results CI + # artifact ships them back too. Default would be ~/.ros/log/, which lives + # on the doomed container filesystem and never gets uploaded -- making + # post-mortem of objective failures impossible. ament_add_pytest_test( objectives_integration_test test/objectives_integration_test.py TIMEOUT 600 - ENV MOVEIT_CONFIG_PACKAGE=lab_sim MOVEIT_HOST_USER_WORKSPACE=${CMAKE_SOURCE_DIR}) + ENV MOVEIT_CONFIG_PACKAGE=lab_sim + MOVEIT_HOST_USER_WORKSPACE=${CMAKE_SOURCE_DIR} + ROS_LOG_DIR=${CMAKE_CURRENT_BINARY_DIR}/test_results/${PROJECT_NAME}/ros_logs) endif() ament_package() diff --git a/src/lab_sim/package.xml b/src/lab_sim/package.xml index bcd0f79a..fab9e211 100644 --- a/src/lab_sim/package.xml +++ b/src/lab_sim/package.xml @@ -41,6 +41,8 @@ ament_cmake_lint_cmake picknik_ament_copyright ament_flake8 + rclpy + rcl_interfaces ament_cmake diff --git a/src/lab_sim/test/conftest.py b/src/lab_sim/test/conftest.py new file mode 100644 index 00000000..1cbd75ab --- /dev/null +++ b/src/lab_sim/test/conftest.py @@ -0,0 +1,158 @@ +# Copyright 2026 PickNik Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of the PickNik Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Live per-objective progress + structured /rosout capture for the +objectives integration test. + +Pytest's default ``--capture=fd`` redirects fds 1 and 2, so when CTest kills +the test on timeout (TIMEOUT 600 in CMakeLists.txt) all per-test output is +lost and the CI log shows nothing past pytest's "collected N items" header. + +The runtest hooks write directly to fd 2, bypassing the capture, so the CI +log always shows which objective was running when the timeout fired and how +long each completed objective took. + +The ``capture_rosout`` fixture mirrors every /rosout message into +``$ROS_LOG_DIR/rosout.ndjson`` as one JSON object per line. Going through +the published /rosout topic (rcl_interfaces/msg/Log — a stable schema) +instead of regex-parsing rcutils file-logger output insulates the HTML +report renderer from rcutils' textual output format, which has no +stability contract. +""" + +import json +import os +import threading +import time +from pathlib import Path + +import pytest +import rclpy +from rcl_interfaces.msg import Log +from rclpy.executors import SingleThreadedExecutor +from rclpy.qos import DurabilityPolicy, HistoryPolicy, QoSProfile, ReliabilityPolicy + +_started_at: dict[str, float] = {} + +# Log message severity byte → ROS log level name. Use literal ints rather +# than `Log.DEBUG` etc. — the message constants are numpy uint8 in some +# rclpy builds, which hash differently from Python int and miss the dict +# lookup, leaving levels in the NDJSON as bare numbers like "30". +_LEVEL_NAMES = {10: "DEBUG", 20: "INFO", 30: "WARN", 40: "ERROR", 50: "FATAL"} + + +def pytest_runtest_logstart(nodeid, location): + _started_at[nodeid] = time.monotonic() + os.write(2, f" START {nodeid}\n".encode()) + + +def pytest_runtest_logreport(report): + if report.when != "call": + return + nodeid = report.nodeid + start = _started_at.get(nodeid) + elapsed_str = f"{time.monotonic() - start:.1f}s" if start is not None else "n/a" + outcome = report.outcome.upper() # PASSED / FAILED / SKIPPED + line = f" {outcome:7s} {nodeid} ({elapsed_str})" + if report.failed and report.longrepr: + reason = str(report.longrepr).splitlines()[-1][:200] + line += f"\n └─ {reason}" + os.write(2, (line + "\n").encode()) + + +# Match the /rosout publisher's QoS exactly so a late-joining subscriber gets +# replayed history (TRANSIENT_LOCAL) up to the publisher's depth. ROS 2's rcl +# logging handler publishes /rosout with depth=1000 / RELIABLE / TRANSIENT_LOCAL / +# KEEP_LAST. If the subscriber is VOLATILE, no historical replay happens even +# if the publisher stores history — both ends must opt in. +_ROSOUT_QOS = QoSProfile( + depth=1000, + history=HistoryPolicy.KEEP_LAST, + reliability=ReliabilityPolicy.RELIABLE, + durability=DurabilityPolicy.TRANSIENT_LOCAL, +) + + +@pytest.fixture(scope="session", autouse=True) +def capture_rosout(): + """Write every /rosout message to $ROS_LOG_DIR/rosout.ndjson. + + Falls back to the current directory when ROS_LOG_DIR is unset (e.g. + local pytest invocations outside CTest). Uses a dedicated rclpy + Context + SingleThreadedExecutor so the subscriber doesn't interact + with whatever context/executor the integration test's own rclpy code + runs on. TRANSIENT_LOCAL durability ensures messages published + before this fixture attached (e.g. backend boot logs) are still + replayed to us, up to the publisher's depth=1000 history. + """ + out_dir = Path(os.environ.get("ROS_LOG_DIR", ".")) + out_dir.mkdir(parents=True, exist_ok=True) + out_path = out_dir / "rosout.ndjson" + + out_file = open(out_path, "w", encoding="utf-8") + write_lock = threading.Lock() + + # Dedicated context: isolates our subscriber and executor from the + # default rclpy context the integration test uses. + context = rclpy.Context() + rclpy.init(context=context) + node = rclpy.create_node("test_rosout_capture", context=context) + + def on_log(msg: Log) -> None: + ts = msg.stamp.sec + msg.stamp.nanosec / 1e9 + entry = { + "ts": ts, + "level": _LEVEL_NAMES.get(msg.level, str(msg.level)), + "node": msg.name, + "msg": msg.msg, + } + with write_lock: + out_file.write(json.dumps(entry, ensure_ascii=False) + "\n") + out_file.flush() # don't lose the last lines on a crash + + node.create_subscription(Log, "/rosout", on_log, _ROSOUT_QOS) + executor = SingleThreadedExecutor(context=context) + executor.add_node(node) + spinner = threading.Thread(target=executor.spin, daemon=True) + spinner.start() + + try: + yield + finally: + # Clean teardown: stop the executor so spin() returns, wait for the + # spinner thread to fully drain, then destroy and close. Order + # matters — closing the file while the spinner is still draining + # would race on out_file.write(). Use an unbounded join because + # executor.shutdown() signals spin() to return via a guard + # condition, so the thread will exit promptly; a timed join would + # leave a window where on_log() runs after out_file.close(). + executor.shutdown() + spinner.join() + node.destroy_node() + rclpy.shutdown(context=context) + out_file.close() diff --git a/src/lab_sim/test/objectives_integration_test.py b/src/lab_sim/test/objectives_integration_test.py index ef849b0c..af274bcc 100644 --- a/src/lab_sim/test/objectives_integration_test.py +++ b/src/lab_sim/test/objectives_integration_test.py @@ -66,6 +66,7 @@ "MPC Pose Tracking", "MPC Pose Tracking With Point Cloud Avoidance", "Octomap Example", # Requires user input to clear the octomap. + "Pick 1 Pill Bottle with ML", "Pick All Bottles with AprilTags", "Pick All Pill Bottles", "Pick up Object",