From dd3b7fbb82d4eedeaeac04177cb58d131b7a8d47 Mon Sep 17 00:00:00 2001 From: David Stern Date: Tue, 5 May 2026 18:42:16 -0400 Subject: [PATCH] Fail closed on GitHub API errors during the sync workflow. --- src/repo_sync/stack/gh_ops.py | 24 +++++++++--------- src/repo_sync/stack/loop_detection.py | 8 +++++- src/repo_sync/workflows/sync.py | 35 +++++++++++++++++++++++---- tests/test_loop_detection.py | 19 +++++++++++++++ 4 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/repo_sync/stack/gh_ops.py b/src/repo_sync/stack/gh_ops.py index 254b266..e9969c6 100644 --- a/src/repo_sync/stack/gh_ops.py +++ b/src/repo_sync/stack/gh_ops.py @@ -291,18 +291,20 @@ def get_pr_for_commit(self, sha: str) -> PullRequest | None: lookup, avoiding false-positive text matches from gh pr list --search. Filters for merged PRs to avoid picking up open/closed PRs that happen to include the same commit. + + Raises subprocess.CalledProcessError if the GitHub API call fails. + Callers in non-critical paths should catch this and degrade gracefully; + callers in safety-critical paths (e.g. loop detection) must let it + propagate to abort the workflow. """ - try: - output = self._run( - [ - "api", - f"repos/{self.repo}/commits/{sha}/pulls", - "--jq", - '[.[] | select(.merged_at != null)] | .[0]', - ], - ) - except subprocess.CalledProcessError: - return None + output = self._run( + [ + "api", + f"repos/{self.repo}/commits/{sha}/pulls", + "--jq", + '[.[] | select(.merged_at != null)] | .[0]', + ], + ) if not output or output == "null": return None pr = json.loads(output) diff --git a/src/repo_sync/stack/loop_detection.py b/src/repo_sync/stack/loop_detection.py index 32a7492..ebf83db 100644 --- a/src/repo_sync/stack/loop_detection.py +++ b/src/repo_sync/stack/loop_detection.py @@ -39,9 +39,15 @@ def is_sync_originated( return False # Check 2: was the commit merged from a repo-sync/ branch? + # + # get_pr_for_commit raises on API failure, which intentionally aborts + # the sync run. We must not fail-open here: if we cannot verify the + # PR branch, we cannot safely determine whether this commit is + # sync-originated. pr = gh.get_pr_for_commit(commit_sha) if pr is None: - # No PR found -- this could be a direct push with a spoofed trailer. + # API succeeded but no merged PR was found — this could be a + # direct push with a spoofed trailer. return False if not is_sync_branch(pr.head_branch): diff --git a/src/repo_sync/workflows/sync.py b/src/repo_sync/workflows/sync.py index ead30f5..1280ff3 100644 --- a/src/repo_sync/workflows/sync.py +++ b/src/repo_sync/workflows/sync.py @@ -12,6 +12,7 @@ from __future__ import annotations import logging +import subprocess from dataclasses import dataclass from repo_sync.stack.branches import check_idempotency @@ -94,9 +95,17 @@ def enumerate_unsynced_commits( # Filter out sync-originated commits. result = [] for sha in all_commits: - if is_sync_originated(source_git, source_gh, sha): - logger.info("Skipping sync-originated commit %s.", sha[:12]) - continue + try: + if is_sync_originated(source_git, source_gh, sha): + logger.info("Skipping sync-originated commit %s.", sha[:12]) + continue + except subprocess.CalledProcessError: + logger.error( + "GitHub API failure during loop detection for commit %s. " + "Aborting sync to avoid creating an infinite loop.", + sha[:12], + ) + raise result.append(sha) return result @@ -146,7 +155,15 @@ def build_public_to_private_description( direct pushes. """ source_repo_name = source_repo.split("/")[-1] - source_pr = source_gh.get_pr_for_commit(source_sha) + try: + source_pr = source_gh.get_pr_for_commit(source_sha) + except subprocess.CalledProcessError: + logger.warning( + "GitHub API error looking up PR for commit %s; " + "falling back to commit-based description.", + source_sha[:12], + ) + source_pr = None if source_pr is not None: return public_to_private_from_pr( @@ -180,7 +197,15 @@ def determine_sync_reviewer( Tries the merger of the source PR first, then the commit author (via GitHub API), then the fallback team. """ - source_pr = source_gh.get_pr_for_commit(source_sha) + try: + source_pr = source_gh.get_pr_for_commit(source_sha) + except subprocess.CalledProcessError: + logger.warning( + "GitHub API error looking up PR for commit %s; " + "falling back to commit author or team for reviewer.", + source_sha[:12], + ) + source_pr = None pr_number = source_pr.number if source_pr else None # For direct pushes (no source PR), look up the commit author via the API. diff --git a/tests/test_loop_detection.py b/tests/test_loop_detection.py index 80f8e37..180b462 100644 --- a/tests/test_loop_detection.py +++ b/tests/test_loop_detection.py @@ -9,8 +9,11 @@ from __future__ import annotations +import subprocess from unittest.mock import MagicMock +import pytest + from repo_sync.stack.gh_ops import GhOps, PullRequest from repo_sync.stack.git_ops import GitOps from repo_sync.stack.loop_detection import is_sync_originated @@ -99,6 +102,22 @@ def test_trailer_present_but_no_pr_found(self) -> None: assert is_sync_originated(git, gh, "directpush") is False + def test_api_failure_propagates_when_trailer_present(self) -> None: + """An API failure during PR lookup aborts rather than failing-open.""" + git = MagicMock(spec=GitOps) + git.commit_message.return_value = ( + "sync commit\n\n" + "Repo-Sync-Origin: warpdotdev/warp-internal@abc123" + ) + + gh = MagicMock(spec=GhOps) + gh.get_pr_for_commit.side_effect = subprocess.CalledProcessError( + 1, ["gh", "api", "repos/org/repo/commits/abc123/pulls"] + ) + + with pytest.raises(subprocess.CalledProcessError): + is_sync_originated(git, gh, "abc123") + def test_public_to_private_sync_branch_recognized(self) -> None: """Public-to-private sync branches are also recognized.""" git = MagicMock(spec=GitOps)