mozilla · suhaibmujahid · Jan 31, 2026 · Jan 29, 2026 · Jan 31, 2026 · Jan 31, 2026
diff --git a/bugbug/code_search/mozilla.py b/bugbug/code_search/mozilla.py
@@ -6,7 +6,6 @@
 from bugbug.code_search.parser import FunctionSearchParser
 from bugbug.code_search.searchfox_api import FunctionSearchSearchfoxAPI
 from bugbug.code_search.searchfox_data import FunctionSearchSearchfoxData
-from bugbug.tools.core.platforms.phabricator import PhabricatorPatch
 
 
 class FunctionSearchMozilla(FunctionSearch):
@@ -115,54 +114,3 @@ def get_file(commit_hash, path):
     repo_dir = sys.argv[1]
 
     function_search_mozilla = FunctionSearchMozilla(repo_dir, get_file, False)
-
-    # https://phabricator.services.mozilla.com/D199272?id=811858
-    patch1 = PhabricatorPatch("811858")
-
-    # In this case, the function was not used before the patch.
-    print(
-        function_search_mozilla.get_function_by_name(
-            patch1.base_commit_hash,
-            "dom/base/nsObjectLoadingContent.cpp",
-            "LowerCaseEqualsASCII",
-        )
-    )
-
-    # In this case, the function was used before the patch.
-    print(
-        function_search_mozilla.get_function_by_name(
-            patch1.base_commit_hash,
-            "dom/base/nsObjectLoadingContent.cpp",
-            "HtmlObjectContentTypeForMIMEType",
-        )
-    )
-
-    # https://phabricator.services.mozilla.com/D199248?id=811740
-    patch2 = PhabricatorPatch("811740")
-
-    # In this case, it is a JS file.
-    print(
-        function_search_mozilla.get_function_by_name(
-            patch2.base_commit_hash,
-            "testing/modules/XPCShellContentUtils.sys.mjs",
-            "registerPathHandler",
-        )
-    )
-
-    patch3 = PhabricatorPatch("721783")
-
-    print(
-        function_search_mozilla.get_function_by_name(
-            patch3.base_commit_hash,
-            "dom/performance/Performance.cpp",
-            "Performance::MemoryPressure",
-        )
-    )
-
-    patch4 = PhabricatorPatch("736446")
-
-    function_search_mozilla.get_function_by_line(
-        patch4.base_commit_hash,
-        "browser/base/content/test/webrtc/browser_devices_select_audio_output.js",
-        180,
-    )
diff --git a/bugbug/tools/code_review/__init__.py b/bugbug/tools/code_review/__init__.py
@@ -43,13 +43,6 @@
 from bugbug.tools.core.platforms.phabricator import (
     PhabricatorReviewData,
 )
-from bugbug.tools.core.platforms.swarm import SwarmReviewData
-
-# Legacy compatibility
-review_data_classes = {
-    "phabricator": PhabricatorReviewData,
-    "swarm": SwarmReviewData,
-}
 
 __all__ = [
     # Agent
@@ -71,6 +64,4 @@
     "ReviewData",
     # Phabricator
     "PhabricatorReviewData",
-    # Legacy
-    "review_data_classes",
 ]
diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
@@ -41,7 +41,7 @@
 from bugbug.tools.core.data_types import InlineComment
 from bugbug.tools.core.exceptions import LargeDiffError, ModelResultError
 from bugbug.tools.core.llms import get_tokenizer
-from bugbug.tools.core.platforms.base import Patch, ReviewData
+from bugbug.tools.core.platforms.base import Patch
 
 logger = getLogger(__name__)
 
@@ -84,7 +84,6 @@ def __init__(
         llm: BaseChatModel,
         patch_summarizer: PatchSummarizer,
         suggestion_filterer: SuggestionFilterer,
-        review_data: ReviewData,
         function_search: Optional[FunctionSearch] = None,
         review_comments_db: Optional["ReviewCommentsDB"] = None,
         show_patch_example: bool = False,
@@ -95,8 +94,6 @@ def __init__(
 
         self.target_software = target_software
 
-        self.review_data = review_data
-
         self._tokenizer = get_tokenizer(
             llm.model_name if hasattr(llm, "model_name") else ""
         )
@@ -156,11 +153,6 @@ def create(cls, **kwargs):
                 QdrantVectorDB("diff_comments")
             )
 
-        if "review_data" not in kwargs:
-            from bugbug.tools.core.platforms.phabricator import PhabricatorReviewData
-
-            kwargs["review_data"] = PhabricatorReviewData()
-
         if "llm" not in kwargs:
             from bugbug.tools.core.llms import create_anthropic_llm
 
@@ -196,11 +188,11 @@ def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str:
             approved_examples=self._get_generated_examples(patch, created_before),
         )
 
-    def generate_review_comments(
+    async def generate_review_comments(
         self, patch: Patch, patch_summary: str
     ) -> list[GeneratedReviewComment]:
         try:
-            for chunk in self.agent.stream(
+            async for chunk in self.agent.astream(
                 {
                     "messages": [
                         HumanMessage(
@@ -218,17 +210,15 @@ def generate_review_comments(
 
         return result["structured_response"].comments
 
-    def run_by_diff_id(self, diff_id: str | int) -> list[InlineComment] | None:
-        patch = self.review_data.get_patch_by_id(diff_id)
-        return self.run(patch)
-
-    def run(self, patch: Patch) -> list[InlineComment] | None:
+    async def run(self, patch: Patch) -> list[InlineComment] | None:
         if self.count_tokens(patch.raw_diff) > 21000:
             raise LargeDiffError("The diff is too large")
 
         patch_summary = self.patch_summarizer.run(patch)
 
-        unfiltered_suggestions = self.generate_review_comments(patch, patch_summary)
+        unfiltered_suggestions = await self.generate_review_comments(
+            patch, patch_summary
+        )
         if not unfiltered_suggestions:
             logger.info("No suggestions were generated")
             return []

diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/langchain_tools.py
@@ -20,7 +20,7 @@ class CodeReviewContext:
 
 
 @tool
-def expand_context(file_path: str, start_line: int, end_line: int) -> str:
+async def expand_context(file_path: str, start_line: int, end_line: int) -> str:
     """Show the content of a file between specified line numbers as it is before the patch.
 
     Be careful to not fill your context window with too much data. Request the
@@ -38,7 +38,7 @@ def expand_context(file_path: str, start_line: int, end_line: int) -> str:
     runtime = get_runtime(CodeReviewContext)
 
     try:
-        file_content = runtime.context.patch.get_old_file(file_path)
+        file_content = await runtime.context.patch.get_old_file(file_path)
     except FileNotFoundError:
         return "File not found in the repository before the patch."
 

diff --git a/bugbug/tools/code_review/scorer.py b/bugbug/tools/code_review/scorer.py
@@ -30,27 +30,30 @@ def score(
             "ground_truth_valid_count": valid_comment_count,
             "ground_truth_invalid_count": invalid_comment_count,
             "ground_truth_total_count": len(ground_truth_comments),
-            "successful": not output["error"],
         }
 
     def summarize(self, score_rows: list[dict]) -> dict:
         """Aggregate scores across all examples."""
-        total_generated = sum(r["generated_comment_count"] for r in score_rows)
-        total_gt_valid = sum(r["ground_truth_valid_count"] for r in score_rows)
-        total_gt_invalid = sum(r["ground_truth_invalid_count"] for r in score_rows)
-        total_gt = sum(r["ground_truth_total_count"] for r in score_rows)
-        error_count = sum(not r["successful"] for r in score_rows)
+        total_examples = len(score_rows)
+        total_generated = sum(r.get("generated_comment_count", 0) for r in score_rows)
+        total_gt_valid = sum(r.get("ground_truth_valid_count", 0) for r in score_rows)
+        total_gt_invalid = sum(
+            r.get("ground_truth_invalid_count", 0) for r in score_rows
+        )
+        total_gt = sum(r.get("ground_truth_total_count", 0) for r in score_rows)
+        successful_runs = sum("generated_comment_count" in r for r in score_rows)
+        error_count = total_examples - successful_runs
 
         return {
             "total_generated_comments": total_generated,
             "total_ground_truth_valid": total_gt_valid,
             "total_ground_truth_invalid": total_gt_invalid,
             "total_ground_truth": total_gt,
             "avg_generated_per_diff": (
-                total_generated / len(score_rows) if score_rows else 0
+                total_generated / successful_runs if successful_runs else 0
             ),
-            "error_rate": error_count / len(score_rows) if score_rows else 0,
-            "num_examples": len(score_rows),
+            "error_rate": error_count / total_examples if total_examples else 0,
+            "num_examples": total_examples,
         }
 
 
@@ -210,36 +213,40 @@ def score(
         }
 
     def summarize(self, score_rows: list[dict]) -> dict:
-        total_matched_valid = sum(r["matched_valid_count"] for r in score_rows)
-        total_matched_invalid = sum(r["matched_invalid_count"] for r in score_rows)
-        total_unmatched_gen = sum(r["unmatched_generated_count"] for r in score_rows)
+        total_matched_valid = sum(r.get("matched_valid_count", 0) for r in score_rows)
+        total_matched_invalid = sum(
+            r.get("matched_invalid_count", 0) for r in score_rows
+        )
+        total_unmatched_gen = sum(
+            r.get("unmatched_generated_count", 0) for r in score_rows
+        )
         total_unmatched_gt_valid = sum(
-            r["unmatched_ground_truth_valid_count"] for r in score_rows
+            r.get("unmatched_ground_truth_valid_count", 0) for r in score_rows
         )
         total_unmatched_gt_invalid = sum(
-            r["unmatched_ground_truth_invalid_count"] for r in score_rows
+            r.get("unmatched_ground_truth_invalid_count", 0) for r in score_rows
         )
 
         total_gt_valid = total_matched_valid + total_unmatched_gt_valid
         total_gt_invalid = total_matched_invalid + total_unmatched_gt_invalid
 
         # Filtering aggregates
-        total_retained = sum(r["filtering_retained_count"] for r in score_rows)
-        total_excluded = sum(r["filtering_excluded_count"] for r in score_rows)
+        total_retained = sum(r.get("filtering_retained_count", 0) for r in score_rows)
+        total_excluded = sum(r.get("filtering_excluded_count", 0) for r in score_rows)
         total_generated = total_retained + total_excluded
 
         # Filtering x Matching aggregates (use len() since values are lists)
         total_matched_valid_retained = sum(
-            len(r["matched_valid_retained"]) for r in score_rows
+            len(r.get("matched_valid_retained", [])) for r in score_rows
         )
         total_matched_valid_excluded = sum(
-            len(r["matched_valid_excluded"]) for r in score_rows
+            len(r.get("matched_valid_excluded", [])) for r in score_rows
         )
         total_matched_invalid_retained = sum(
-            len(r["matched_invalid_retained"]) for r in score_rows
+            len(r.get("matched_invalid_retained", [])) for r in score_rows
         )
         total_matched_invalid_excluded = sum(
-            len(r["matched_invalid_excluded"]) for r in score_rows
+            len(r.get("matched_invalid_excluded", [])) for r in score_rows
         )
 
         return {

diff --git a/bugbug/tools/core/connection.py b/bugbug/tools/core/connection.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Core connection utilities for bugbug tools."""
+
+import os
+from functools import cache
+
+import httpx
+
+
+def get_user_agent() -> str:
+    """Get the User-Agent string from environment or default."""
+    return os.getenv("USER_AGENT", "bugbug")
+
+
+@cache
+def get_http_client() -> httpx.AsyncClient:
+    """Get the shared HTTP client instance."""
+    http_client = httpx.AsyncClient(
+        follow_redirects=True,
+        headers={
+            "User-Agent": get_user_agent(),
+        },
+    )
+
+    return http_client
+
+
+async def close_http_client() -> None:
+    """Close the shared HTTP client instance and clear the cache."""
+    if get_http_client.cache_info().currsize == 0:
+        # No cached client to close
+        return
+
+    client = get_http_client()
+    get_http_client.cache_clear()
+    await client.aclose()
diff --git a/bugbug/tools/core/platforms/base.py b/bugbug/tools/core/platforms/base.py
@@ -23,10 +23,6 @@ class Patch(ABC):
     @abstractmethod
     def patch_id(self) -> str: ...
 
-    @property
-    @abstractmethod
-    def base_commit_hash(self) -> str: ...
-
     @property
     @abstractmethod
     def raw_diff(self) -> str: ...
@@ -64,7 +60,7 @@ def patch_url(self) -> str:
         ...
 
     @abstractmethod
-    def get_old_file(self, file_path: str) -> str:
+    async def get_old_file(self, file_path: str) -> str:
         """Return the contents of a file before the patch was applied."""
         ...
 
@@ -74,10 +70,6 @@ class ReviewData(ABC):
 
     NIT_PATTERN = re.compile(r"[^a-zA-Z0-9]nit[\s:,]", re.IGNORECASE)
 
-    @abstractmethod
-    def get_review_request_by_id(self, review_id: int):
-        raise NotImplementedError
-
     @abstractmethod
     def get_patch_by_id(self, patch_id: str | int) -> Patch:
         raise NotImplementedError