From 81d2ed206aa55c6f556ab50274f1282e36f741c4 Mon Sep 17 00:00:00 2001 From: lselvar Date: Tue, 23 Jun 2026 17:36:04 -0400 Subject: [PATCH 1/8] fix: resolve GitHub release asset API URL for private repo bundle downloads For private/SSO-protected GitHub repos, browser release download URLs (https://github.com///releases/download//) redirect to an HTML/SSO page instead of delivering the asset, causing bundle manifest downloads to fail. Extends the pattern from #2855 (presets/workflows) to cover the bundle manifest download path in _download_remote_manifest: - Resolves browser release URLs to GitHub REST API asset URLs via resolve_github_release_asset_api_url before downloading - Direct REST API asset URLs (api.github.com/repos/.../releases/assets/) are passed through directly - Both cases use Accept: application/octet-stream so the API returns the binary payload rather than JSON metadata - The original catalog URL is used to determine artifact format (.zip vs YAML) since the resolved API URL does not carry the file extension Adds two CLI-level contract tests: - bundle info resolves browser release URL via GitHub tags API - bundle info passes direct API asset URL through with octet-stream Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 25 +++- tests/contract/test_bundle_cli.py | 120 ++++++++++++++++++++ 2 files changed, 143 insertions(+), 2 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 185e00acf6..8816f03e3e 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -794,22 +794,43 @@ def _download_remote_manifest(entry_id: str, url: str): import tempfile from ...authentication.http import open_url + from ..._github_http import resolve_github_release_asset_api_url def _validate_redirect(old_url: str, new_url: str) -> None: _require_https(f"bundle '{entry_id}'", new_url) _require_https(f"bundle '{entry_id}'", url) + + # For private/SSO-protected GitHub repos, browser release download URLs + # (https://github.com///releases/download//) + # redirect to an HTML/SSO page instead of delivering the asset. Resolve + # such URLs to the GitHub REST API asset URL so the authenticated client + # can download the actual file. + extra_headers = None + effective_url = url + resolved = resolve_github_release_asset_api_url(url, open_url, timeout=30) + if resolved: + effective_url = resolved + extra_headers = {"Accept": "application/octet-stream"} + try: - with open_url(url, timeout=30, redirect_validator=_validate_redirect) as resp: + with open_url( + effective_url, + timeout=30, + redirect_validator=_validate_redirect, + extra_headers=extra_headers, + ) as resp: _require_https(f"bundle '{entry_id}'", resp.geturl()) raw = resp.read() except BundlerError: raise except Exception as exc: # noqa: BLE001 - raise BundlerError(f"Failed to download bundle '{entry_id}' from {url}: {exc}") from exc + raise BundlerError(f"Failed to download bundle '{entry_id}' from {effective_url}: {exc}") from exc # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. + # Use the original catalog URL (``url``) to determine the artifact format + # since the resolved API URL does not carry the file extension. if url.lower().endswith(".zip"): with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index 018b2bbec1..d5867acba7 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -8,6 +8,7 @@ import json from pathlib import Path +from unittest.mock import patch import pytest import yaml @@ -389,3 +390,122 @@ def test_install_integration_override_cannot_bypass_clash_guard(project: Path): ) assert result.exit_code == 1 assert "claude" in result.output and "copilot" in result.output + + +# ===== Private GitHub release asset URL resolution ===== + + +class FakeBundleResponse: + """Minimal context-manager response stub for open_url fakes.""" + + def __init__(self, data: bytes, url: str = "https://api.github.com/repos/org/repo/releases/assets/99"): + self._data = data + self._url = url + + def read(self) -> bytes: + return self._data + + def geturl(self) -> str: + return self._url + + def __enter__(self): + return self + + def __exit__(self, *_): + return False + + +def _make_catalog_config(catalog_path: Path, project: Path) -> None: + """Write a bundle-catalogs.yml pointing at *catalog_path* in *project*.""" + config = { + "schema_version": "1.0", + "catalogs": [ + { + "id": "test", + "url": str(catalog_path), + "priority": 1, + "install_policy": "install-allowed", + } + ], + } + (project / ".specify" / "bundle-catalogs.yml").write_text( + yaml.safe_dump(config), encoding="utf-8" + ) + + +def test_bundle_info_resolves_github_browser_release_url(project: Path): + """bundle info resolves a private-repo browser release URL via the GitHub API.""" + browser_url = "https://github.com/org/repo/releases/download/v1.0/bundle.yml" + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/99" + + captured = [] + manifest_yaml = yaml.safe_dump(valid_manifest_dict()).encode() + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + if "releases/tags/" in url: + # GitHub API release-tags lookup — return asset list + return FakeBundleResponse( + json.dumps({ + "assets": [{"name": "bundle.yml", "url": api_asset_url}] + }).encode(), + url=url, + ) + # Actual asset download + return FakeBundleResponse(manifest_yaml, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=browser_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # The browser release URL must have been resolved via the GitHub tags API + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 1, f"Expected exactly one tags API call; got {captured}" + assert "releases/tags/v1.0" in tag_calls[0] + + # The actual download must use the resolved API asset URL with octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) >= 1 + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + +def test_bundle_info_passes_through_api_asset_url(project: Path): + """bundle info passes a direct GitHub API asset URL through with octet-stream.""" + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/77" + + captured = [] + manifest_yaml = yaml.safe_dump(valid_manifest_dict()).encode() + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + return FakeBundleResponse(manifest_yaml, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=api_asset_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # No tags API call — URL was already a REST asset URL + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 0 + + # Exactly one download call to the asset URL with octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) == 1 + assert asset_calls[0][0] == api_asset_url + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} From 29e043f3bcaade657e05ab432b2a4791682d145d Mon Sep 17 00:00:00 2001 From: lselvar Date: Thu, 25 Jun 2026 22:16:53 -0400 Subject: [PATCH 2/8] fix: detect ZIP payload by magic bytes; add zip and API-asset tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot review feedback on PR #3136: 1. Detect ZIP payloads by magic bytes (PK\x03\x04) in addition to the '.zip' URL suffix so that direct GitHub REST asset URLs — which carry no file extension — are correctly routed through the ZIP extraction path when the asset is a ZIP bundle artifact. 2. Add two new contract tests: - test_bundle_info_resolves_github_browser_release_url_zip: exercises the '.zip' browser release URL path end-to-end, verifying the tags API lookup fires, octet-stream header is used, and bundle.yml is successfully extracted from the ZIP payload. - test_bundle_info_api_asset_url_zip_detected_by_magic_bytes: verifies that a direct REST asset URL returning ZIP bytes is detected by magic and parsed correctly without a tags API call. Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 8 +- tests/contract/test_bundle_cli.py | 99 +++++++++++++++++++++ 2 files changed, 104 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 8816f03e3e..e752daf9b2 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -829,9 +829,11 @@ def _validate_redirect(old_url: str, new_url: str) -> None: # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. - # Use the original catalog URL (``url``) to determine the artifact format - # since the resolved API URL does not carry the file extension. - if url.lower().endswith(".zip"): + # Detection uses the original catalog URL's extension when available (browser + # release URLs carry the filename), and falls back to the ZIP magic bytes for + # direct REST API asset URLs which have no file extension in their path. + _ZIP_MAGIC = b"PK\x03\x04" + if url.lower().endswith(".zip") or raw[:4] == _ZIP_MAGIC: with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" artifact.write_bytes(raw) diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index d5867acba7..c955b45034 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -509,3 +509,102 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None assert len(asset_calls) == 1 assert asset_calls[0][0] == api_asset_url assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + +def test_bundle_info_resolves_github_browser_release_url_zip(project: Path, tmp_path: Path): + """bundle info resolves a browser release URL for a .zip artifact and extracts bundle.yml.""" + import io + import zipfile + + browser_url = "https://github.com/org/repo/releases/download/v2.0/bundle.zip" + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/88" + + # Build a minimal in-memory ZIP containing bundle.yml + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("bundle.yml", yaml.safe_dump(valid_manifest_dict())) + zip_bytes = buf.getvalue() + + captured = [] + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + if "releases/tags/" in url: + return FakeBundleResponse( + json.dumps({ + "assets": [{"name": "bundle.zip", "url": api_asset_url}] + }).encode(), + url=url, + ) + return FakeBundleResponse(zip_bytes, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=browser_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # tags API lookup must have fired + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 1 + assert "releases/tags/v2.0" in tag_calls[0] + + # Asset download uses octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) >= 1 + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + # Manifest was successfully parsed from the ZIP + payload = json.loads(result.output) + assert payload["id"] == "demo-bundle" + + +def test_bundle_info_api_asset_url_zip_detected_by_magic_bytes(project: Path): + """bundle info correctly handles a direct API asset URL that serves ZIP bytes.""" + import io + import zipfile + + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/55" + + # Build a minimal in-memory ZIP containing bundle.yml + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("bundle.yml", yaml.safe_dump(valid_manifest_dict())) + zip_bytes = buf.getvalue() + + captured = [] + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + return FakeBundleResponse(zip_bytes, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=api_asset_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # No tags API call — URL was already a REST asset URL + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 0 + + # Download used octet-stream header + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) == 1 + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + # ZIP bytes were detected by magic and bundle.yml extracted correctly + payload = json.loads(result.output) + assert payload["id"] == "demo-bundle" From 684893dd23e749e0ff15b6419b18c1eca9127b06 Mon Sep 17 00:00:00 2001 From: lselvar Date: Fri, 26 Jun 2026 14:57:49 -0400 Subject: [PATCH 3/8] fix: improve error message, broaden ZIP magic, drop unused tmp_path Address second-round Copilot review feedback on PR #3136: - Error message: when the download fails, report the original catalog download_url so the user knows which entry to fix; include the resolved REST API URL when it differs for easier debugging. - ZIP detection: broaden the magic-bytes check from PK\x03\x04 to raw[:2] == b"PK", covering all valid ZIP variants (local-file header PK\x03\x04, empty-archive PK\x05\x06, spanned/split PK\x07\x08). - Tests: remove the unused tmp_path parameter from test_bundle_info_resolves_github_browser_release_url_zip. Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 16 +++++++++++----- tests/contract/test_bundle_cli.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index e752daf9b2..fc6c22eebb 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -825,15 +825,21 @@ def _validate_redirect(old_url: str, new_url: str) -> None: except BundlerError: raise except Exception as exc: # noqa: BLE001 - raise BundlerError(f"Failed to download bundle '{entry_id}' from {effective_url}: {exc}") from exc + # Report the original catalog URL so users know which entry to fix, + # and include the resolved URL when it differs for easier debugging. + if effective_url != url: + msg = f"Failed to download bundle '{entry_id}' from {url} (resolved to {effective_url}): {exc}" + else: + msg = f"Failed to download bundle '{entry_id}' from {url}: {exc}" + raise BundlerError(msg) from exc # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. # Detection uses the original catalog URL's extension when available (browser - # release URLs carry the filename), and falls back to the ZIP magic bytes for - # direct REST API asset URLs which have no file extension in their path. - _ZIP_MAGIC = b"PK\x03\x04" - if url.lower().endswith(".zip") or raw[:4] == _ZIP_MAGIC: + # release URLs carry the filename), and falls back to the ZIP magic bytes + # (``PK`` prefix) for direct REST API asset URLs which have no file extension. + # All valid ZIP variants start with ``PK`` (local-file, empty-archive, split). + if url.lower().endswith(".zip") or raw[:2] == b"PK": with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" artifact.write_bytes(raw) diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index c955b45034..83a80496da 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -511,7 +511,7 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None assert asset_calls[0][1] == {"Accept": "application/octet-stream"} -def test_bundle_info_resolves_github_browser_release_url_zip(project: Path, tmp_path: Path): +def test_bundle_info_resolves_github_browser_release_url_zip(project: Path): """bundle info resolves a browser release URL for a .zip artifact and extracts bundle.yml.""" import io import zipfile From 4c8281cd52be8253f76b4182e647107a4e17c4e1 Mon Sep 17 00:00:00 2001 From: lselvar Date: Fri, 26 Jun 2026 19:54:46 -0400 Subject: [PATCH 4/8] fix: use full 4-byte ZIP signatures instead of 2-byte PK prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot feedback: raw[:2] == b"PK" is too broad and could misclassify any payload starting with ASCII "PK" as a ZIP, producing a confusing "not a valid bundle" error. Use the three specific 4-byte ZIP magic signatures instead: PK\x03\x04 — local file header (standard ZIP) PK\x05\x06 — end-of-central-directory (empty archive) PK\x07\x08 — data descriptor / spanning marker Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index fc6c22eebb..58db3928cd 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -836,10 +836,15 @@ def _validate_redirect(old_url: str, new_url: str) -> None: # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. # Detection uses the original catalog URL's extension when available (browser - # release URLs carry the filename), and falls back to the ZIP magic bytes - # (``PK`` prefix) for direct REST API asset URLs which have no file extension. - # All valid ZIP variants start with ``PK`` (local-file, empty-archive, split). - if url.lower().endswith(".zip") or raw[:2] == b"PK": + # release URLs carry the filename), and falls back to the 4-byte ZIP magic + # for direct REST API asset URLs which have no file extension. The three + # recognised signatures cover all valid ZIP variants without the false-positive + # risk of a 2-byte ``PK`` prefix check: + # PK\x03\x04 — local file header (standard ZIP) + # PK\x05\x06 — end-of-central-directory (empty archive) + # PK\x07\x08 — data descriptor / spanning marker + _ZIP_SIGNATURES = (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08") + if url.lower().endswith(".zip") or raw[:4] in _ZIP_SIGNATURES: with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" artifact.write_bytes(raw) From ee5e8b6f08c1da6a59a926e62f6be43ea25865a8 Mon Sep 17 00:00:00 2001 From: lselvar Date: Tue, 30 Jun 2026 21:16:56 -0400 Subject: [PATCH 5/8] fix: harden _download_remote_manifest parsing and tighten tests - Promote _ZIP_SIGNATURES to module-level constant (was redefined per call) - Use PurePosixPath for URL path suffix extraction so query strings and fragments are ignored and URL paths are treated as POSIX on all OSes - Move yaml/BundleManifest imports to function top to flatten the previously nested try/except into a single handler with explicit except _yaml.YAMLError and except Exception clauses - Re-add None guard on _local_manifest_source return: the function is typed Optional[BundleManifest] and without the guard a None return propagates silently to callers that degrade gracefully rather than raising an actionable error; comment explains it is defensive not dead - Assert exact resolved asset URL in browser-URL download tests, not just the Accept header, so a regression where download uses the original URL instead of the resolved one would be caught - Add resolution-failure test: when tags API finds no matching asset the code falls back to the original URL and exits non-zero with Error: Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 72 ++++++++++++++------- tests/contract/test_bundle_cli.py | 52 ++++++++++++++- 2 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 58db3928cd..fdbeeed325 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -629,6 +629,14 @@ def catalog_remove( console.print(f"[green]✓[/green] Removed catalog source '{removed}'.") +# ZIP magic-byte signatures used to detect .zip payloads from REST API asset +# URLs, which carry no file extension. The three signatures cover all valid +# ZIP variants (PK\x03\x04 = local file header, PK\x05\x06 = empty archive, +# PK\x07\x08 = spanning marker) without the false-positive risk of checking +# only the 2-byte "PK" prefix. +_ZIP_SIGNATURES = (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08") + + # ===== internal helpers ===== @@ -792,9 +800,14 @@ def _download_remote_manifest(entry_id: str, url: str): """Fetch a remote bundle artifact over HTTPS and extract its manifest.""" import io import tempfile + from pathlib import PurePosixPath + from urllib.parse import urlparse as _urlparse + + import yaml as _yaml from ...authentication.http import open_url from ..._github_http import resolve_github_release_asset_api_url + from ...bundler.models.manifest import BundleManifest def _validate_redirect(old_url: str, new_url: str) -> None: _require_https(f"bundle '{entry_id}'", new_url) @@ -811,6 +824,7 @@ def _validate_redirect(old_url: str, new_url: str) -> None: resolved = resolve_github_release_asset_api_url(url, open_url, timeout=30) if resolved: effective_url = resolved + _require_https(f"bundle '{entry_id}'", effective_url) extra_headers = {"Accept": "application/octet-stream"} try: @@ -835,32 +849,40 @@ def _validate_redirect(old_url: str, new_url: str) -> None: # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. - # Detection uses the original catalog URL's extension when available (browser - # release URLs carry the filename), and falls back to the 4-byte ZIP magic - # for direct REST API asset URLs which have no file extension. The three - # recognised signatures cover all valid ZIP variants without the false-positive - # risk of a 2-byte ``PK`` prefix check: - # PK\x03\x04 — local file header (standard ZIP) - # PK\x05\x06 — end-of-central-directory (empty archive) - # PK\x07\x08 — data descriptor / spanning marker - _ZIP_SIGNATURES = (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08") - if url.lower().endswith(".zip") or raw[:4] in _ZIP_SIGNATURES: - with tempfile.TemporaryDirectory() as tmp: - artifact = Path(tmp) / "bundle.zip" - artifact.write_bytes(raw) - manifest = _local_manifest_source(str(artifact)) - if manifest is None: - raise BundlerError( - f"Downloaded artifact for bundle '{entry_id}' is not a valid bundle." - ) - return manifest - - import yaml as _yaml - - from ...bundler.models.manifest import BundleManifest + # Detection uses the path component of the original catalog URL (via + # PurePosixPath so query strings and fragments are ignored, and URL paths + # are always treated as POSIX regardless of host OS), falling back to the + # module-level _ZIP_SIGNATURES magic-byte check for direct REST API asset + # URLs which carry no file extension. + _url_ext = PurePosixPath(_urlparse(url).path).suffix.lower() + try: + if _url_ext == ".zip" or raw[:4] in _ZIP_SIGNATURES: + with tempfile.TemporaryDirectory() as tmp: + artifact = Path(tmp) / "bundle.zip" + artifact.write_bytes(raw) + manifest = _local_manifest_source(str(artifact)) + # _local_manifest_source returns None only when the file does + # not exist; since we just wrote *artifact* that cannot happen + # here. The explicit guard ensures callers never receive None + # and silently degrade instead of raising a clear error. + if manifest is None: + raise BundlerError( + f"Downloaded artifact for bundle '{entry_id}' is not a valid bundle." + ) + return manifest - data = _yaml.safe_load(io.BytesIO(raw)) - return BundleManifest.from_dict(data) + data = _yaml.safe_load(io.BytesIO(raw)) + return BundleManifest.from_dict(data) + except BundlerError: + raise + except _yaml.YAMLError as exc: + raise BundlerError( + f"Downloaded content for bundle '{entry_id}' is not valid YAML: {exc}" + ) from exc + except Exception as exc: # noqa: BLE001 + raise BundlerError( + f"Failed to parse downloaded bundle '{entry_id}': {exc}" + ) from exc def register(app: typer.Typer) -> None: diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index 83a80496da..7ba38fa940 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -473,7 +473,8 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None # The actual download must use the resolved API asset URL with octet-stream asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] - assert len(asset_calls) >= 1 + assert len(asset_calls) == 1 + assert asset_calls[0][0] == api_asset_url assert asset_calls[0][1] == {"Accept": "application/octet-stream"} @@ -555,9 +556,10 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None assert len(tag_calls) == 1 assert "releases/tags/v2.0" in tag_calls[0] - # Asset download uses octet-stream + # Asset download uses the resolved API URL with octet-stream asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] - assert len(asset_calls) >= 1 + assert len(asset_calls) == 1 + assert asset_calls[0][0] == api_asset_url assert asset_calls[0][1] == {"Accept": "application/octet-stream"} # Manifest was successfully parsed from the ZIP @@ -608,3 +610,47 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None # ZIP bytes were detected by magic and bundle.yml extracted correctly payload = json.loads(result.output) assert payload["id"] == "demo-bundle" + + +def test_bundle_info_github_release_url_resolution_failure_falls_back_and_errors(project: Path): + """When the GitHub tags API lookup finds no matching asset, fall back to the + original browser URL and surface a meaningful error (not a raw traceback).""" + browser_url = "https://github.com/org/repo/releases/download/v3.0/bundle.yml" + + captured = [] + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + if "releases/tags/" in url: + # Tags API responds but the asset list doesn't include our file + return FakeBundleResponse( + json.dumps({"assets": []}).encode(), + url=url, + ) + # Fallback download: GitHub serves HTML (SSO redirect) instead of YAML + return FakeBundleResponse(b"SSO login required", url=url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=browser_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + # Must exit non-zero — the HTML body is not a valid bundle manifest + assert result.exit_code == 1 + + # The tags API lookup must have fired + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 1 + + # The fallback download should use the original browser URL (no octet-stream) + fallback_calls = [(url, h) for url, h in captured if url == browser_url] + assert len(fallback_calls) == 1 + assert fallback_calls[0][1] is None # no Accept header on the original URL + + # Error output must be actionable (not a raw traceback) + assert "Error:" in result.output From a4ce981782b2e57d2aaf5b767129dc1bfbfda97a Mon Sep 17 00:00:00 2001 From: lselvar Date: Tue, 30 Jun 2026 22:00:14 -0400 Subject: [PATCH 6/8] fix(bundle): pass github_provider_hosts() for GHES private release downloads Extends the GHES support pattern from extensions and presets (#2855, #3157) to the bundle manifest download path: resolve_github_release_asset_api_url now receives github_hosts=github_provider_hosts() so browser release URLs from GitHub Enterprise Server instances are resolved via /api/v3 rather than falling back to the unauthenticated download path. Also adds a contract test covering the GHES resolution path for _download_remote_manifest (analogous to the existing github.com tests). Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 6 ++- tests/contract/test_bundle_cli.py | 55 +++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 94b1487c43..63d30930e8 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -807,7 +807,7 @@ def _download_remote_manifest(entry_id: str, url: str): import yaml as _yaml - from ...authentication.http import open_url + from ...authentication.http import github_provider_hosts, open_url from ..._github_http import resolve_github_release_asset_api_url from ...bundler.models.manifest import BundleManifest @@ -823,7 +823,9 @@ def _validate_redirect(old_url: str, new_url: str) -> None: # can download the actual file. extra_headers = None effective_url = url - resolved = resolve_github_release_asset_api_url(url, open_url, timeout=30) + resolved = resolve_github_release_asset_api_url( + url, open_url, timeout=30, github_hosts=github_provider_hosts() + ) if resolved: effective_url = resolved _require_https(f"bundle '{entry_id}'", effective_url) diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index 4853df1d36..2cc2f2134c 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -669,3 +669,58 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None # Error output must be actionable (not a raw traceback) assert "Error:" in result.output + + +def test_bundle_info_resolves_ghes_browser_release_url(project: Path): + """bundle info resolves a GHES private-repo browser release URL via /api/v3.""" + ghes_host = "ghes.example" + browser_url = f"https://{ghes_host}/org/repo/releases/download/v1.0/bundle.yml" + api_asset_url = f"https://{ghes_host}/api/v3/repos/org/repo/releases/assets/42" + + captured = [] + manifest_yaml = yaml.safe_dump(valid_manifest_dict()).encode() + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + if "/api/v3/repos/" in url and "releases/tags/" in url: + return FakeBundleResponse( + json.dumps({ + "assets": [{"name": "bundle.yml", "url": api_asset_url}] + }).encode(), + url=url, + ) + return FakeBundleResponse(manifest_yaml, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=browser_url)}, + ) + _make_catalog_config(catalog, project) + + ghes_entry = { + "hosts": [ghes_host], + "provider": "github", + "auth": "bearer", + "token": "ghes-test-token", + } + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url), \ + patch("specify_cli.authentication.http.github_provider_hosts", return_value=(ghes_host,)): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # The GHES /api/v3 tags lookup must have fired + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 1 + assert f"{ghes_host}/api/v3/repos/org/repo/releases/tags/v1.0" in tag_calls[0] + + # Asset download must use the resolved GHES API URL with octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) == 1 + assert asset_calls[0][0] == api_asset_url + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + payload = json.loads(result.output) + assert payload["id"] == "demo-bundle" From 2aa95717e78bff35e3849eb703c028fc5abcfdb3 Mon Sep 17 00:00:00 2001 From: lselvar Date: Wed, 1 Jul 2026 15:02:02 -0400 Subject: [PATCH 7/8] test(bundle): remove unused ghes_entry variable from GHES contract test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dict was defined but never consumed — the test drives GHES host recognition entirely through the github_provider_hosts() patch. Co-Authored-By: Claude Sonnet 4.6 --- tests/contract/test_bundle_cli.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index 2cc2f2134c..1705c5945d 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -698,13 +698,6 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None ) _make_catalog_config(catalog, project) - ghes_entry = { - "hosts": [ghes_host], - "provider": "github", - "auth": "bearer", - "token": "ghes-test-token", - } - with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url), \ patch("specify_cli.authentication.http.github_provider_hosts", return_value=(ghes_host,)): result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) From b41a2fe7708dfc6f6a034829e4d70ed43f61a81b Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Wed, 1 Jul 2026 16:21:09 -0500 Subject: [PATCH 8/8] fix(bundle): include source URL in remote manifest parse errors Thread the catalog URL (and resolved API URL when it differs) into the YAML parse, generic parse, and ZIP-extraction error paths of _download_remote_manifest so failures point at the offending source instead of an opaque temp path. Addresses PR review feedback. Assisted-by: GitHub Copilot (model: Claude Opus 4.8, autonomous) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/commands/bundle/__init__.py | 37 ++++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 63d30930e8..b3c84c6ba7 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -831,6 +831,14 @@ def _validate_redirect(old_url: str, new_url: str) -> None: _require_https(f"bundle '{entry_id}'", effective_url) extra_headers = {"Accept": "application/octet-stream"} + # Human-readable description of where the bytes came from, reused across + # all post-download error messages so failures point at the catalog URL + # (and resolved API URL, if any) instead of an opaque temp path. + if effective_url != url: + _source_desc = f"{url} (resolved to {effective_url})" + else: + _source_desc = url + try: with open_url( effective_url, @@ -845,11 +853,9 @@ def _validate_redirect(old_url: str, new_url: str) -> None: except Exception as exc: # noqa: BLE001 # Report the original catalog URL so users know which entry to fix, # and include the resolved URL when it differs for easier debugging. - if effective_url != url: - msg = f"Failed to download bundle '{entry_id}' from {url} (resolved to {effective_url}): {exc}" - else: - msg = f"Failed to download bundle '{entry_id}' from {url}: {exc}" - raise BundlerError(msg) from exc + raise BundlerError( + f"Failed to download bundle '{entry_id}' from {_source_desc}: {exc}" + ) from exc # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. @@ -864,14 +870,25 @@ def _validate_redirect(old_url: str, new_url: str) -> None: with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" artifact.write_bytes(raw) - manifest = _local_manifest_source(str(artifact)) + # Wrap ZIP parsing so any failure (BadZipFile, missing + # bundle.yml, etc.) references the source URL rather than the + # opaque temporary path, consistent with the download-error + # handling above. + try: + manifest = _local_manifest_source(str(artifact)) + except Exception as exc: # noqa: BLE001 + raise BundlerError( + f"Downloaded artifact for bundle '{entry_id}' from " + f"{_source_desc} is not a valid bundle: {exc}" + ) from exc # _local_manifest_source returns None only when the file does # not exist; since we just wrote *artifact* that cannot happen # here. The explicit guard ensures callers never receive None # and silently degrade instead of raising a clear error. if manifest is None: raise BundlerError( - f"Downloaded artifact for bundle '{entry_id}' is not a valid bundle." + f"Downloaded artifact for bundle '{entry_id}' from " + f"{_source_desc} is not a valid bundle." ) return manifest @@ -881,11 +898,13 @@ def _validate_redirect(old_url: str, new_url: str) -> None: raise except _yaml.YAMLError as exc: raise BundlerError( - f"Downloaded content for bundle '{entry_id}' is not valid YAML: {exc}" + f"Downloaded content for bundle '{entry_id}' from {_source_desc} " + f"is not valid YAML: {exc}" ) from exc except Exception as exc: # noqa: BLE001 raise BundlerError( - f"Failed to parse downloaded bundle '{entry_id}': {exc}" + f"Failed to parse downloaded bundle '{entry_id}' from " + f"{_source_desc}: {exc}" ) from exc