mozilla · sylvestre · Jun 23, 2026 · Jun 26, 2026
diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
@@ -24,6 +24,7 @@
     AgentResponse,
     CodeReviewToolResponse,
     GeneratedReviewComment,
+    PatchScopeResponse,
     Skill,
 )
 from bugbug.tools.code_review.database import ReviewCommentsDB
@@ -36,6 +37,7 @@
     CODE_REVIEW_TODO_PROMPT,
     CODE_REVIEW_TODO_TOOL_DESCRIPTION,
     FIRST_MESSAGE_TEMPLATE,
+    PATCH_SCOPE_PROMPT,
     STATIC_COMMENT_EXAMPLES,
     SYSTEM_PROMPT_TEMPLATE,
     TEMPLATE_COMMENT_EXAMPLE,
@@ -124,6 +126,13 @@ def __init__(
             middleware=middleware,
         )
 
+        # A separate, tool-less pass dedicated to assessing the patch's overall
+        # scope and suggesting a split when the patch is too large or unfocused.
+        self.scope_agent = create_agent(
+            llm,
+            response_format=ProviderStrategy(PatchScopeResponse),
+        )
+
         self.review_comments_db = review_comments_db
 
         self.show_patch_example = show_patch_example
@@ -243,6 +252,21 @@ async def generate_review_comments(
 
         return result["structured_response"].comments, manifest
 
+    async def assess_patch_scope(
+        self, patch: Patch, patch_summary: str
+    ) -> list[GeneratedReviewComment]:
+        """Run a separate pass that suggests splitting an overly large/unfocused patch.
+
+        Returns at most one comment. Empty when no split is warranted.
+        """
+        prompt = PATCH_SCOPE_PROMPT.format(
+            target_software=self.target_software,
+            patch=format_patch_set(patch.patch_set),
+            patch_summary=patch_summary,
+        )
+        result = await self.scope_agent.ainvoke({"messages": [HumanMessage(prompt)]})
+        return result["structured_response"].comments[:1]
+
     async def run(self, patch: Patch) -> CodeReviewToolResponse:
         if self.count_tokens(patch.raw_diff) > 21000:
             raise LargeDiffError("The diff is too large")
@@ -258,8 +282,16 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
 
         filtered_suggestions = self.suggestion_filterer.run(unfiltered_suggestions)
 
+        # The patch-scope suggestion is a deliberate, meta-level comment, so it
+        # bypasses the suggestion filterer and is always sorted last.
+        scope_suggestions = await self.assess_patch_scope(patch, patch_summary)
+        for offset, suggestion in enumerate(scope_suggestions):
+            suggestion.order = len(filtered_suggestions) + offset + 1
+
         inline_comments = list(
-            convert_generated_comments_to_inline(filtered_suggestions, patch.patch_set)
+            convert_generated_comments_to_inline(
+                filtered_suggestions + scope_suggestions, patch.patch_set
+            )
         )
 
         return CodeReviewToolResponse(
@@ -268,6 +300,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
             details={
                 "model": self._agent_model_name,
                 "num_unfiltered_suggestions": len(unfiltered_suggestions),
+                "num_scope_suggestions": len(scope_suggestions),
                 "external_content": external_content_manifest,
             },
         )

diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py
@@ -30,6 +30,17 @@ class AgentResponse(BaseModel):
     )
 
 
+class PatchScopeResponse(BaseModel):
+    """The response from the patch-scope assessment pass."""
+
+    comments: list[GeneratedReviewComment] = Field(
+        description=(
+            "At most one comment suggesting the patch be split into smaller, "
+            "independently reviewable pieces. Empty when no split is warranted."
+        )
+    )
+
+
 class CodeReviewToolResponse(BaseModel):
     """The response from the CodeReviewTool."""
 

diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py
@@ -58,6 +58,37 @@
 """
 
 
+PATCH_SCOPE_PROMPT = """You are an expert {target_software} engineer assessing whether a pull request is too large or unfocused to be reviewed well.
+
+Large changes are harder to review and empirically carry higher defect and regression risk: reviewer defect-detection drops sharply once a change grows past a few hundred changed lines, and at Mozilla the patches that introduce regressions tend to be the larger ones.
+
+Decide whether to suggest the author split this patch into smaller, independently reviewable pieces. Either of these is a reason to suggest a split:
+  1. The patch bundles multiple INDEPENDENT, unrelated changes (e.g. two unrelated features, an unrelated refactor mixed with a behavior change, or several bug fixes that share no rationale)
+  2. The patch is very large even if cohesive — past roughly a few hundred changed lines a single change becomes hard to review thoroughly, regardless of how unified it is
+
+Rules:
+- Emit AT MOST ONE comment. If neither reason holds, return an empty list of comments.
+- Use judgement: do NOT flag a moderately sized, cohesive change. Reserve this for patches whose size or mixed scope genuinely impedes careful review.
+- A large change that has no natural seam and must land atomically is acceptable — return an empty list rather than suggesting an impractical split.
+- If you do comment, name concrete seams: the distinct concerns, or the stages of a large cohesive change (e.g. land the data-model change separately from the call-site updates).
+- Briefly tell the author *why*: larger patches get less thorough review and empirically introduce more bugs and regressions, so smaller patches are easier to review and safer to land.
+- Anchor the comment to a representative changed line (a line that begins with `+`). Set `file` to that file's path and `code_line` to that line's number.
+- Use direct, declarative language. NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to".
+
+Here is a summary of the patch:
+
+<patch_summary>
+{patch_summary}
+</patch_summary>
+
+Here is the patch you need to assess:
+
+<patch>
+{patch}
+</patch>
+"""
+
+
 FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:
 
 <patch_summary>

diff --git a/tests/test_code_review.py b/tests/test_code_review.py
@@ -13,6 +13,8 @@
 from bugbug.tools.code_review import data_types, langchain_tools, review_context
 from bugbug.tools.code_review.data_types import (
     ExternalContent,
+    GeneratedReviewComment,
+    PatchScopeResponse,
     Skill,
     _strip_frontmatter,
 )
@@ -1258,3 +1260,95 @@ async def test_extra_context_toml_replaces_by_name():
     assert len(results) == 1
     assert results[0].name == ".claude/skills/dom-media-v2.md"
     assert results[0].body == "Updated guidelines.\n"
+
+
+# ---------------------------------------------------------------------------
+# patch-scope assessment pass (split suggestion)
+# ---------------------------------------------------------------------------
+
+
+def _make_scope_tool(scope_response):
+    """Build a CodeReviewTool without running __init__ (avoids create_agent).
+
+    The scope_agent is mocked to return the given PatchScopeResponse.
+    """
+    pytest.importorskip("langchain")
+    from bugbug.tools.code_review.agent import CodeReviewTool
+
+    tool = CodeReviewTool.__new__(CodeReviewTool)
+    tool.target_software = "Mozilla Firefox"
+    tool.scope_agent = MagicMock()
+    tool.scope_agent.ainvoke = AsyncMock(
+        return_value={"structured_response": scope_response}
+    )
+    return tool
+
+
+def test_assess_patch_scope_returns_at_most_one_comment():
+    # The model returns two comments; the pass must keep only the first.
+    comments = [
+        GeneratedReviewComment(
+            file="b/f.txt",
+            code_line=1,
+            comment=f"Split this patch {i}",
+            explanation="bundles unrelated changes",
+            order=1,
+        )
+        for i in range(2)
+    ]
+    tool = _make_scope_tool(PatchScopeResponse(comments=comments))
+
+    patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
+    result = asyncio.run(tool.assess_patch_scope(patch, "summary"))
+
+    assert len(result) == 1
+    assert result[0].comment == "Split this patch 0"
+
+
+def test_assess_patch_scope_returns_empty_when_no_split_warranted():
+    tool = _make_scope_tool(PatchScopeResponse(comments=[]))
+
+    patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
+    result = asyncio.run(tool.assess_patch_scope(patch, "summary"))
+
+    assert result == []
+
+
+def test_run_appends_scope_suggestion_last():
+    pytest.importorskip("langchain")
+
+    regular = GeneratedReviewComment(
+        file="b/f.txt",
+        code_line=1,
+        comment="Fix the bug",
+        explanation="off-by-one",
+        order=1,
+    )
+    scope = GeneratedReviewComment(
+        file="b/f.txt",
+        code_line=2,
+        comment="Split this patch into smaller pieces",
+        explanation="bundles unrelated changes",
+        order=1,
+    )
+    tool = _make_scope_tool(PatchScopeResponse(comments=[scope]))
+
+    patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
+    patch.raw_diff = "diff"
+
+    tool.count_tokens = MagicMock(return_value=10)
+    tool._agent_model = "model-x"
+    tool.patch_summarizer = MagicMock()
+    tool.patch_summarizer.run = MagicMock(return_value="summary")
+    tool.generate_review_comments = AsyncMock(return_value=([regular], []))
+    tool.suggestion_filterer = MagicMock()
+    tool.suggestion_filterer.run = MagicMock(return_value=[regular])
+
+    result = asyncio.run(tool.run(patch))
+
+    assert result.details["num_scope_suggestions"] == 1
+    assert len(result.review_comments) == 2
+    # The split suggestion is sorted last (order = len(filtered) + 1).
+    last = result.review_comments[-1]
+    assert last.content == "Split this patch into smaller pieces"
+    assert last.order == 2