diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
index f04de3d860..c486f2b48f 100644
--- a/bugbug/tools/code_review/agent.py
+++ b/bugbug/tools/code_review/agent.py
@@ -24,6 +24,7 @@
AgentResponse,
CodeReviewToolResponse,
GeneratedReviewComment,
+ PatchScopeResponse,
Skill,
)
from bugbug.tools.code_review.database import ReviewCommentsDB
@@ -36,6 +37,7 @@
CODE_REVIEW_TODO_PROMPT,
CODE_REVIEW_TODO_TOOL_DESCRIPTION,
FIRST_MESSAGE_TEMPLATE,
+ PATCH_SCOPE_PROMPT,
STATIC_COMMENT_EXAMPLES,
SYSTEM_PROMPT_TEMPLATE,
TEMPLATE_COMMENT_EXAMPLE,
@@ -124,6 +126,13 @@ def __init__(
middleware=middleware,
)
+ # A separate, tool-less pass dedicated to assessing the patch's overall
+ # scope and suggesting a split when the patch is too large or unfocused.
+ self.scope_agent = create_agent(
+ llm,
+ response_format=ProviderStrategy(PatchScopeResponse),
+ )
+
self.review_comments_db = review_comments_db
self.show_patch_example = show_patch_example
@@ -243,6 +252,21 @@ async def generate_review_comments(
return result["structured_response"].comments, manifest
+ async def assess_patch_scope(
+ self, patch: Patch, patch_summary: str
+ ) -> list[GeneratedReviewComment]:
+ """Run a separate pass that suggests splitting an overly large/unfocused patch.
+
+ Returns at most one comment. Empty when no split is warranted.
+ """
+ prompt = PATCH_SCOPE_PROMPT.format(
+ target_software=self.target_software,
+ patch=format_patch_set(patch.patch_set),
+ patch_summary=patch_summary,
+ )
+ result = await self.scope_agent.ainvoke({"messages": [HumanMessage(prompt)]})
+ return result["structured_response"].comments[:1]
+
async def run(self, patch: Patch) -> CodeReviewToolResponse:
if self.count_tokens(patch.raw_diff) > 21000:
raise LargeDiffError("The diff is too large")
@@ -258,8 +282,16 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
filtered_suggestions = self.suggestion_filterer.run(unfiltered_suggestions)
+ # The patch-scope suggestion is a deliberate, meta-level comment, so it
+ # bypasses the suggestion filterer and is always sorted last.
+ scope_suggestions = await self.assess_patch_scope(patch, patch_summary)
+ for offset, suggestion in enumerate(scope_suggestions):
+ suggestion.order = len(filtered_suggestions) + offset + 1
+
inline_comments = list(
- convert_generated_comments_to_inline(filtered_suggestions, patch.patch_set)
+ convert_generated_comments_to_inline(
+ filtered_suggestions + scope_suggestions, patch.patch_set
+ )
)
return CodeReviewToolResponse(
@@ -268,6 +300,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
details={
"model": self._agent_model_name,
"num_unfiltered_suggestions": len(unfiltered_suggestions),
+ "num_scope_suggestions": len(scope_suggestions),
"external_content": external_content_manifest,
},
)
diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py
index b6c56c1114..07d2c002fb 100644
--- a/bugbug/tools/code_review/data_types.py
+++ b/bugbug/tools/code_review/data_types.py
@@ -30,6 +30,17 @@ class AgentResponse(BaseModel):
)
+class PatchScopeResponse(BaseModel):
+ """The response from the patch-scope assessment pass."""
+
+ comments: list[GeneratedReviewComment] = Field(
+ description=(
+ "At most one comment suggesting the patch be split into smaller, "
+ "independently reviewable pieces. Empty when no split is warranted."
+ )
+ )
+
+
class CodeReviewToolResponse(BaseModel):
"""The response from the CodeReviewTool."""
diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py
index 8d6d94b61f..3b91556f7e 100644
--- a/bugbug/tools/code_review/prompts.py
+++ b/bugbug/tools/code_review/prompts.py
@@ -58,6 +58,37 @@
"""
+PATCH_SCOPE_PROMPT = """You are an expert {target_software} engineer assessing whether a pull request is too large or unfocused to be reviewed well.
+
+Large changes are harder to review and empirically carry higher defect and regression risk: reviewer defect-detection drops sharply once a change grows past a few hundred changed lines, and at Mozilla the patches that introduce regressions tend to be the larger ones.
+
+Decide whether to suggest the author split this patch into smaller, independently reviewable pieces. Either of these is a reason to suggest a split:
+ 1. The patch bundles multiple INDEPENDENT, unrelated changes (e.g. two unrelated features, an unrelated refactor mixed with a behavior change, or several bug fixes that share no rationale)
+ 2. The patch is very large even if cohesive — past roughly a few hundred changed lines a single change becomes hard to review thoroughly, regardless of how unified it is
+
+Rules:
+- Emit AT MOST ONE comment. If neither reason holds, return an empty list of comments.
+- Use judgement: do NOT flag a moderately sized, cohesive change. Reserve this for patches whose size or mixed scope genuinely impedes careful review.
+- A large change that has no natural seam and must land atomically is acceptable — return an empty list rather than suggesting an impractical split.
+- If you do comment, name concrete seams: the distinct concerns, or the stages of a large cohesive change (e.g. land the data-model change separately from the call-site updates).
+- Briefly tell the author *why*: larger patches get less thorough review and empirically introduce more bugs and regressions, so smaller patches are easier to review and safer to land.
+- Anchor the comment to a representative changed line (a line that begins with `+`). Set `file` to that file's path and `code_line` to that line's number.
+- Use direct, declarative language. NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to".
+
+Here is a summary of the patch:
+
+
+{patch_summary}
+
+
+Here is the patch you need to assess:
+
+
+{patch}
+
+"""
+
+
FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index c571583b04..8a533766da 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -13,6 +13,8 @@
from bugbug.tools.code_review import data_types, langchain_tools, review_context
from bugbug.tools.code_review.data_types import (
ExternalContent,
+ GeneratedReviewComment,
+ PatchScopeResponse,
Skill,
_strip_frontmatter,
)
@@ -1258,3 +1260,95 @@ async def test_extra_context_toml_replaces_by_name():
assert len(results) == 1
assert results[0].name == ".claude/skills/dom-media-v2.md"
assert results[0].body == "Updated guidelines.\n"
+
+
+# ---------------------------------------------------------------------------
+# patch-scope assessment pass (split suggestion)
+# ---------------------------------------------------------------------------
+
+
+def _make_scope_tool(scope_response):
+ """Build a CodeReviewTool without running __init__ (avoids create_agent).
+
+ The scope_agent is mocked to return the given PatchScopeResponse.
+ """
+ pytest.importorskip("langchain")
+ from bugbug.tools.code_review.agent import CodeReviewTool
+
+ tool = CodeReviewTool.__new__(CodeReviewTool)
+ tool.target_software = "Mozilla Firefox"
+ tool.scope_agent = MagicMock()
+ tool.scope_agent.ainvoke = AsyncMock(
+ return_value={"structured_response": scope_response}
+ )
+ return tool
+
+
+def test_assess_patch_scope_returns_at_most_one_comment():
+ # The model returns two comments; the pass must keep only the first.
+ comments = [
+ GeneratedReviewComment(
+ file="b/f.txt",
+ code_line=1,
+ comment=f"Split this patch {i}",
+ explanation="bundles unrelated changes",
+ order=1,
+ )
+ for i in range(2)
+ ]
+ tool = _make_scope_tool(PatchScopeResponse(comments=comments))
+
+ patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
+ result = asyncio.run(tool.assess_patch_scope(patch, "summary"))
+
+ assert len(result) == 1
+ assert result[0].comment == "Split this patch 0"
+
+
+def test_assess_patch_scope_returns_empty_when_no_split_warranted():
+ tool = _make_scope_tool(PatchScopeResponse(comments=[]))
+
+ patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
+ result = asyncio.run(tool.assess_patch_scope(patch, "summary"))
+
+ assert result == []
+
+
+def test_run_appends_scope_suggestion_last():
+ pytest.importorskip("langchain")
+
+ regular = GeneratedReviewComment(
+ file="b/f.txt",
+ code_line=1,
+ comment="Fix the bug",
+ explanation="off-by-one",
+ order=1,
+ )
+ scope = GeneratedReviewComment(
+ file="b/f.txt",
+ code_line=2,
+ comment="Split this patch into smaller pieces",
+ explanation="bundles unrelated changes",
+ order=1,
+ )
+ tool = _make_scope_tool(PatchScopeResponse(comments=[scope]))
+
+ patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
+ patch.raw_diff = "diff"
+
+ tool.count_tokens = MagicMock(return_value=10)
+ tool._agent_model = "model-x"
+ tool.patch_summarizer = MagicMock()
+ tool.patch_summarizer.run = MagicMock(return_value="summary")
+ tool.generate_review_comments = AsyncMock(return_value=([regular], []))
+ tool.suggestion_filterer = MagicMock()
+ tool.suggestion_filterer.run = MagicMock(return_value=[regular])
+
+ result = asyncio.run(tool.run(patch))
+
+ assert result.details["num_scope_suggestions"] == 1
+ assert len(result.review_comments) == 2
+ # The split suggestion is sorted last (order = len(filtered) + 1).
+ last = result.review_comments[-1]
+ assert last.content == "Split this patch into smaller pieces"
+ assert last.order == 2