getsentry · Abdkhan14 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
@@ -3,6 +3,7 @@
 import re
 import time
 from collections import defaultdict
+from concurrent.futures import as_completed
 from typing import TYPE_CHECKING, Any, TypedDict
 
 import sentry_sdk
@@ -43,6 +44,7 @@
 from sentry.integrations.github.platform_registry import (
     _PackageManifest as _PackageManifest,
 )
+from sentry.utils.concurrent import ContextPropagatingThreadPoolExecutor
 
 if TYPE_CHECKING:
     from sentry.integrations.github.client import GitHubBaseClient
@@ -191,9 +193,9 @@ def _select_active_platforms(
 )
 
 
-def _path_is_ignored(path: str) -> bool:
-    """Return True if any segment of the path is in the ignore-list."""
-    return any(segment in _IGNORED_TREE_SEGMENTS for segment in path.split("/"))
+def _segments_are_ignored(segments: list[str]) -> bool:
+    """Return True if any path segment is in the ignore-list."""
+    return any(segment in _IGNORED_TREE_SEGMENTS for segment in segments)
 
 
 def _get_tree(
@@ -243,23 +245,31 @@ def _build_tree_index(entries: list[dict[str, Any]]) -> _TreeIndex:
     Any entry whose path passes through an ignored segment is skipped, so
     ``node_modules/some-lib/package.json`` never contributes a false signal.
     ``full_repo_size_bytes`` is the sum of ``size`` across all blobs.
+
+    Each path is split once; the resulting segments are reused for both the
+    ignore check and the basename extraction (single-pass, no redundant splits).
     """
     files_full_paths_by_basename: dict[str, set[str]] = defaultdict(set)
     dirs_full_paths_by_basename: dict[str, set[str]] = defaultdict(set)
     full_repo_size_bytes = 0
 
     for entry in entries:
         path = entry.get("path", "")
+        entry_type = entry.get("type")
         size = entry.get("size") or 0
 
-        if entry.get("type") == "blob":
+        if entry_type == "blob":
             full_repo_size_bytes += size
 
-        if not path or _path_is_ignored(path):
+        if not path:
             continue
 
-        entry_type = entry.get("type")
-        basename = path.rsplit("/", 1)[-1]
+        # Split once; reuse segments for ignore check and basename.
+        segments = path.split("/")
+        if _segments_are_ignored(segments):
+            continue
+
+        basename = segments[-1]
 
         if entry_type == "blob":
             files_full_paths_by_basename[basename].add(path)
@@ -480,11 +490,15 @@ def detect_platforms_multi(
     """
     start_time = time.monotonic()
 
-    languages: dict[str, int] = client.get_languages(repo)
-    active_platforms = _select_active_platforms(languages)
-
+    # Run get_languages and _get_tree concurrently — they are independent
+    # requests.  active_platforms only needs languages, so it is computed on
+    # the main thread while the tree fetch is in flight.
     tree_start = time.monotonic()
-    entries, is_truncated = _get_tree(client, repo, ref)
+    with ContextPropagatingThreadPoolExecutor(max_workers=1) as ex:
+        tree_future = ex.submit(_get_tree, client, repo, ref)
+        languages: dict[str, int] = client.get_languages(repo)
+        active_platforms = _select_active_platforms(languages)
+        entries, is_truncated = tree_future.result()
     tree_duration_ms = (time.monotonic() - tree_start) * 1000
     index = _build_tree_index(entries)
 
@@ -526,12 +540,20 @@ def detect_platforms_multi(
     # are always within the cap before subdirectory files from monorepo workspaces.
     capped_paths = sorted(needed_paths, key=lambda p: (p.count("/"), p))[:MAX_CONTENT_READS]
 
+    # Fan out all content reads in parallel — each _get_repo_file_content call
+    # is an independent REST round-trip and already returns None on failure, so
+    # a single slow or missing file cannot block the others.
     content_reads_start = time.monotonic()
     content_by_path: dict[str, str] = {}
-    for path in capped_paths:
-        content = _get_repo_file_content(client, repo, path, ref)
-        if content is not None:
-            content_by_path[path] = content
+    if capped_paths:
+        with ContextPropagatingThreadPoolExecutor(max_workers=len(capped_paths)) as ex:
+            future_to_path = {
+                ex.submit(_get_repo_file_content, client, repo, p, ref): p for p in capped_paths
+            }
+            for future in as_completed(future_to_path):
+                content = future.result()
+                if content is not None:
+                    content_by_path[future_to_path[future]] = content
     content_reads_duration_ms = (time.monotonic() - content_reads_start) * 1000
 
     manifests_by_path: dict[str, _PackageManifest] = {}
@@ -628,11 +650,15 @@ def detect_platforms_multi(
         f"{_MULTI_METRICS_PREFIX}.k_reads_realized",
         k_reads_realized,
     )
+    # tree.duration: wall time of the concurrent (languages + tree) block —
+    # effectively the tree's wall time since it is the long pole.
     sentry_sdk.metrics.distribution(
         f"{_MULTI_METRICS_PREFIX}.tree.duration",
         tree_duration_ms,
         unit="millisecond",
     )
+    # content_reads.duration: parallel wall time (max of individual reads),
+    # not the former serial sum — expect this metric to be significantly lower.
     sentry_sdk.metrics.distribution(
         f"{_MULTI_METRICS_PREFIX}.content_reads.duration",
         content_reads_duration_ms,

@@ -12,8 +12,8 @@
     _collect_needed_paths,
     _framework_matches_scoped,
     _get_tree,
-    _path_is_ignored,
     _rule_parent_dirs,
+    _segments_are_ignored,
     _select_active_platforms,
     detect_platforms_multi,
 )
@@ -23,7 +23,7 @@
     FrameworkDef,
     _PackageManifest,
 )
-from sentry.shared_integrations.exceptions import ApiError
+from sentry.shared_integrations.exceptions import ApiConflictError, ApiError
 from sentry.utils import json
 
 
@@ -623,26 +623,84 @@ def test_byte_count_descending_ordering(self) -> None:
         assert first_platform == "python"
 
 
-class TestPathIsIgnored:
+class TestSegmentsAreIgnored:
     def test_node_modules_segment_ignored(self) -> None:
-        assert _path_is_ignored("node_modules/react/index.js") is True
+        assert _segments_are_ignored(["node_modules", "react", "index.js"]) is True
 
     def test_nested_ignored_segment(self) -> None:
-        assert _path_is_ignored("a/b/vendor/c/util.py") is True
+        assert _segments_are_ignored(["a", "b", "vendor", "c", "util.py"]) is True
 
     def test_build_gradle_file_not_ignored(self) -> None:
-        # "build" is an ignored *directory* segment, but "build.gradle" is a filename,
-        # not the bare segment "build", so it must NOT be ignored.
-        assert _path_is_ignored("build.gradle") is False
+        # "build" is an ignored *directory* segment, but "build.gradle" as a
+        # single segment is not the bare string "build", so must NOT be ignored.
+        assert _segments_are_ignored(["build.gradle"]) is False
 
     def test_clean_path_not_ignored(self) -> None:
-        assert _path_is_ignored("src/app/main.py") is False
+        assert _segments_are_ignored(["src", "app", "main.py"]) is False
 
     def test_root_level_file_not_ignored(self) -> None:
-        assert _path_is_ignored("manage.py") is False
+        assert _segments_are_ignored(["manage.py"]) is False
 
     def test_dist_dir_ignored(self) -> None:
-        assert _path_is_ignored("dist/bundle.js") is True
+        assert _segments_are_ignored(["dist", "bundle.js"]) is True
+
+
+class TestDetectPlatformsMultiConcurrency:
+    def test_isolated_content_read_failure_does_not_abort_detection(self) -> None:
+        """If one content read raises ApiError the other reads must still
+        complete and the detection must return a valid result."""
+        from base64 import b64encode
+
+        # tree: requirements.txt (django content) + a second file that will 404
+        tree = [
+            _make_tree_entry("requirements.txt"),
+            _make_tree_entry("broken_file.txt"),  # will raise ApiError
+        ]
+        client = mock.MagicMock()
+        client.get_languages.return_value = {"Python": 80_000}
+
+        def get_side_effect(path: str, params: dict | None = None) -> Any:
+            if "/git/trees/" in path:
+                return {"tree": tree, "truncated": False}
+            rel = path.split("/contents/", 1)[1]
+            if rel == "requirements.txt":
+                return {"content": b64encode(b"Django==4.2\n").decode()}
+            raise ApiError("Not Found", code=404)
+
+        client.get.side_effect = get_side_effect
+        # Must not raise; ApiError on a single file is swallowed by
+        # _get_repo_file_content and should not abort the pool.
+        result = detect_platforms_multi(client, "owner/repo")
+        platform_ids = {p["platform"] for p in result["platforms"]}
+        assert "python-django" in platform_ids
+
+    def test_api_conflict_error_propagates_from_tree_future(self) -> None:
+        client = mock.MagicMock()
+        client.get_languages.return_value = {}
+        client.get.side_effect = ApiConflictError("empty repo")
+        import pytest
+
+        with pytest.raises(ApiConflictError):
+            detect_platforms_multi(client, "owner/repo")
+
+    def test_languages_computed_before_tree_result_is_joined(self) -> None:
+        """active_platforms is derived from languages independently of the
+        tree.  Verify detection still produces correct output even when the
+        tree responds very slowly (simulated via call ordering introspection)."""
+        # Simple fixture: Go repo, go.mod at root (existence rule → high match).
+        tree = [_make_tree_entry("go.mod")]
+        client = _make_client(
+            languages={"Go": 60_000},
+            tree=tree,
+            contents={},
+        )
+        result = detect_platforms_multi(client, "owner/repo")
+        platform_ids = {p["platform"] for p in result["platforms"]}
+        assert "go" in platform_ids
+        # get_languages must have been called exactly once, tree call once.
+        assert client.get_languages.call_count == 1
+        tree_calls = [c for c in client.get.call_args_list if "/git/trees/" in c.args[0]]
+        assert len(tree_calls) == 1
 
 
 class TestGetTree: