Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion bugbug/tools/code_review/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
AgentResponse,
CodeReviewToolResponse,
GeneratedReviewComment,
PatchScopeResponse,
Skill,
)
from bugbug.tools.code_review.database import ReviewCommentsDB
Expand All @@ -36,6 +37,7 @@
CODE_REVIEW_TODO_PROMPT,
CODE_REVIEW_TODO_TOOL_DESCRIPTION,
FIRST_MESSAGE_TEMPLATE,
PATCH_SCOPE_PROMPT,
STATIC_COMMENT_EXAMPLES,
SYSTEM_PROMPT_TEMPLATE,
TEMPLATE_COMMENT_EXAMPLE,
Expand Down Expand Up @@ -124,6 +126,13 @@ def __init__(
middleware=middleware,
)

# A separate, tool-less pass dedicated to assessing the patch's overall
# scope and suggesting a split when the patch is too large or unfocused.
self.scope_agent = create_agent(
llm,
response_format=ProviderStrategy(PatchScopeResponse),
)

self.review_comments_db = review_comments_db

self.show_patch_example = show_patch_example
Expand Down Expand Up @@ -243,6 +252,21 @@ async def generate_review_comments(

return result["structured_response"].comments, manifest

async def assess_patch_scope(
self, patch: Patch, patch_summary: str
) -> list[GeneratedReviewComment]:
"""Run a separate pass that suggests splitting an overly large/unfocused patch.

Returns at most one comment. Empty when no split is warranted.
"""
prompt = PATCH_SCOPE_PROMPT.format(
target_software=self.target_software,
patch=format_patch_set(patch.patch_set),
patch_summary=patch_summary,
)
result = await self.scope_agent.ainvoke({"messages": [HumanMessage(prompt)]})
return result["structured_response"].comments[:1]

async def run(self, patch: Patch) -> CodeReviewToolResponse:
if self.count_tokens(patch.raw_diff) > 21000:
raise LargeDiffError("The diff is too large")
Expand All @@ -258,8 +282,16 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:

filtered_suggestions = self.suggestion_filterer.run(unfiltered_suggestions)

# The patch-scope suggestion is a deliberate, meta-level comment, so it
# bypasses the suggestion filterer and is always sorted last.
scope_suggestions = await self.assess_patch_scope(patch, patch_summary)
for offset, suggestion in enumerate(scope_suggestions):
suggestion.order = len(filtered_suggestions) + offset + 1

inline_comments = list(
convert_generated_comments_to_inline(filtered_suggestions, patch.patch_set)
convert_generated_comments_to_inline(
filtered_suggestions + scope_suggestions, patch.patch_set
)
)

return CodeReviewToolResponse(
Expand All @@ -268,6 +300,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
details={
"model": self._agent_model_name,
"num_unfiltered_suggestions": len(unfiltered_suggestions),
"num_scope_suggestions": len(scope_suggestions),
"external_content": external_content_manifest,
},
)
Expand Down
11 changes: 11 additions & 0 deletions bugbug/tools/code_review/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ class AgentResponse(BaseModel):
)


class PatchScopeResponse(BaseModel):
"""The response from the patch-scope assessment pass."""

comments: list[GeneratedReviewComment] = Field(
description=(
"At most one comment suggesting the patch be split into smaller, "
"independently reviewable pieces. Empty when no split is warranted."
)
)


class CodeReviewToolResponse(BaseModel):
"""The response from the CodeReviewTool."""

Expand Down
31 changes: 31 additions & 0 deletions bugbug/tools/code_review/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,37 @@
"""


PATCH_SCOPE_PROMPT = """You are an expert {target_software} engineer assessing whether a pull request is too large or unfocused to be reviewed well.

Large changes are harder to review and empirically carry higher defect and regression risk: reviewer defect-detection drops sharply once a change grows past a few hundred changed lines, and at Mozilla the patches that introduce regressions tend to be the larger ones.

Decide whether to suggest the author split this patch into smaller, independently reviewable pieces. Either of these is a reason to suggest a split:
1. The patch bundles multiple INDEPENDENT, unrelated changes (e.g. two unrelated features, an unrelated refactor mixed with a behavior change, or several bug fixes that share no rationale)
2. The patch is very large even if cohesive — past roughly a few hundred changed lines a single change becomes hard to review thoroughly, regardless of how unified it is

Rules:
- Emit AT MOST ONE comment. If neither reason holds, return an empty list of comments.
- Use judgement: do NOT flag a moderately sized, cohesive change. Reserve this for patches whose size or mixed scope genuinely impedes careful review.
- A large change that has no natural seam and must land atomically is acceptable — return an empty list rather than suggesting an impractical split.
- If you do comment, name concrete seams: the distinct concerns, or the stages of a large cohesive change (e.g. land the data-model change separately from the call-site updates).
- Briefly tell the author *why*: larger patches get less thorough review and empirically introduce more bugs and regressions, so smaller patches are easier to review and safer to land.
- Anchor the comment to a representative changed line (a line that begins with `+`). Set `file` to that file's path and `code_line` to that line's number.
- Use direct, declarative language. NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to".

Here is a summary of the patch:

<patch_summary>
{patch_summary}
</patch_summary>

Here is the patch you need to assess:

<patch>
{patch}
</patch>
"""


FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:

<patch_summary>
Expand Down
94 changes: 94 additions & 0 deletions tests/test_code_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from bugbug.tools.code_review import data_types, langchain_tools, review_context
from bugbug.tools.code_review.data_types import (
ExternalContent,
GeneratedReviewComment,
PatchScopeResponse,
Skill,
_strip_frontmatter,
)
Expand Down Expand Up @@ -1258,3 +1260,95 @@ async def test_extra_context_toml_replaces_by_name():
assert len(results) == 1
assert results[0].name == ".claude/skills/dom-media-v2.md"
assert results[0].body == "Updated guidelines.\n"


# ---------------------------------------------------------------------------
# patch-scope assessment pass (split suggestion)
# ---------------------------------------------------------------------------


def _make_scope_tool(scope_response):
"""Build a CodeReviewTool without running __init__ (avoids create_agent).

The scope_agent is mocked to return the given PatchScopeResponse.
"""
pytest.importorskip("langchain")
from bugbug.tools.code_review.agent import CodeReviewTool

tool = CodeReviewTool.__new__(CodeReviewTool)
tool.target_software = "Mozilla Firefox"
tool.scope_agent = MagicMock()
tool.scope_agent.ainvoke = AsyncMock(
return_value={"structured_response": scope_response}
)
return tool


def test_assess_patch_scope_returns_at_most_one_comment():
# The model returns two comments; the pass must keep only the first.
comments = [
GeneratedReviewComment(
file="b/f.txt",
code_line=1,
comment=f"Split this patch {i}",
explanation="bundles unrelated changes",
order=1,
)
for i in range(2)
]
tool = _make_scope_tool(PatchScopeResponse(comments=comments))

patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
result = asyncio.run(tool.assess_patch_scope(patch, "summary"))

assert len(result) == 1
assert result[0].comment == "Split this patch 0"


def test_assess_patch_scope_returns_empty_when_no_split_warranted():
tool = _make_scope_tool(PatchScopeResponse(comments=[]))

patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
result = asyncio.run(tool.assess_patch_scope(patch, "summary"))

assert result == []


def test_run_appends_scope_suggestion_last():
pytest.importorskip("langchain")

regular = GeneratedReviewComment(
file="b/f.txt",
code_line=1,
comment="Fix the bug",
explanation="off-by-one",
order=1,
)
scope = GeneratedReviewComment(
file="b/f.txt",
code_line=2,
comment="Split this patch into smaller pieces",
explanation="bundles unrelated changes",
order=1,
)
tool = _make_scope_tool(PatchScopeResponse(comments=[scope]))

patch = make_patch("--- a/f.txt\n+++ b/f.txt\n@@ -0,0 +1,2 @@\n+a\n+b\n")
patch.raw_diff = "diff"

tool.count_tokens = MagicMock(return_value=10)
tool._agent_model = "model-x"
tool.patch_summarizer = MagicMock()
tool.patch_summarizer.run = MagicMock(return_value="summary")
tool.generate_review_comments = AsyncMock(return_value=([regular], []))
tool.suggestion_filterer = MagicMock()
tool.suggestion_filterer.run = MagicMock(return_value=[regular])

result = asyncio.run(tool.run(patch))

assert result.details["num_scope_suggestions"] == 1
assert len(result.review_comments) == 2
# The split suggestion is sorted last (order = len(filtered) + 1).
last = result.review_comments[-1]
assert last.content == "Split this patch into smaller pieces"
assert last.order == 2