mozilla · suhaibmujahid · Jun 26, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -34,25 +34,6 @@ updates:
     open-pull-requests-limit: 99
     allow:
       - dependency-type: direct
-  - package-ecosystem: uv
-    directory: "/services/buildrepair"
-    schedule:
-      interval: weekly
-      day: thursday
-    groups:
-      patch:
-        applies-to: version-updates
-        patterns:
-          - "*"
-        update-types:
-          - patch
-    cooldown:
-      semver-major-days: 14
-      semver-minor-days: 7
-      semver-patch-days: 3
-    open-pull-requests-limit: 99
-    allow:
-      - dependency-type: direct
   - package-ecosystem: npm
     directory: "/ui/changes"
     schedule:

diff --git a/agents/build-repair/Dockerfile b/agents/build-repair/Dockerfile
@@ -0,0 +1,81 @@
+FROM python:3.12 AS builder
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
+ENV UV_PROJECT_ENVIRONMENT=/opt/venv
+
+WORKDIR /app
+
+# Install external deps without building workspace members.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=VERSION,target=VERSION \
+    uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-build-repair
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,target=/app,rw \
+    uv sync --locked --no-dev --no-editable --package hackbot-agent-build-repair
+
+FROM python:3.12 AS base
+
+COPY --from=builder /opt/venv /opt/venv
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PATH="/opt/venv/bin:$PATH"
+
+FROM base AS agent
+
+# hackbot.toml lives at the agent root (not inside the package), so copy it into
+# the working dir; the runtime discovers it there (cwd) at startup.
+COPY agents/build-repair/hackbot.toml /app/hackbot.toml
+
+RUN useradd --create-home --shell /bin/bash agent \
+    && mkdir -p /workspace \
+    && chown agent:agent /workspace
+
+# `mach bootstrap` installs the toolchain here at runtime; put it on PATH so the
+# agent's own `./mach build` (and the build_firefox tool) find rustc/clang.
+ENV PATH="/home/agent/.cargo/bin:/home/agent/.mozbuild/clang/bin:${PATH}"
+
+USER agent
+
+CMD ["python", "-m", "hackbot_agents.build_repair"]
+
+FROM base AS broker
+
+RUN useradd --create-home --shell /bin/bash broker
+
+USER broker
+
+EXPOSE 8765
+
+CMD ["python", "-m", "hackbot_agents.build_repair.broker"]
+
+# Evaluation image: the production agent image plus the eval-only Python deps
+# (weave, wandb, tenacity). Deriving FROM agent means the harness runs the agent
+# in the identical production runtime (same user, HOME, PATH, mach toolchain). The
+# prod `agent` target never pulls these deps, since this stage builds only when
+# targeted.
+FROM agent AS eval
+
+USER root
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
+# Add the `eval` extra into the existing venv. `--no-install-workspace` avoids
+# rebuilding the already-installed agent package; `--inexact` keeps it (and every
+# other prod package) rather than pruning it as extraneous.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=VERSION,target=VERSION \
+    UV_PROJECT_ENVIRONMENT=/opt/venv \
+    uv sync --frozen --no-dev --no-install-workspace --inexact --extra eval --package hackbot-agent-build-repair
+
+USER agent
+ENV FIREFOX_GIT_REPO=/firefox
+
+ENTRYPOINT ["python", "-m", "evals.eval"]
+CMD ["--no-try-push", "--limit", "1"]
diff --git a/agents/build-repair/README.md b/agents/build-repair/README.md
@@ -0,0 +1,92 @@
+# Build Repair Agent
+
+Two-stage Claude agent that diagnoses a Firefox build failure and edits the source
+tree to fix it. Agent logic in `hackbot_agents/build_repair/`; the Weave eval
+harness in `evals/`.
+
+Run the Docker commands below from the repo root, with secrets in a local `.env`
+(`ANTHROPIC_API_KEY`, `BUGZILLA_API_KEY`, plus `WANDB_API_KEY` for evals).
+
+The second stage attempts building Firefox to verify the fix and iterate on it if it fails.
+It also optionally bootstraps Firefox build if needed.
+
+## Input
+
+- `BUG_ID` - Optional Bugzilla bug ID
+- `GIT_COMMIT` - Firefox Git commit that failed the build
+- `FAILURE_TASKS` - a dictionary of failed Taskcluster tasks {task_name: taskcluster_task_id}
+
+## Output
+
+First stage - analysis:
+
+- `summary.md` - a quick summary for a developer
+- `analysis.md` - detailed analysis
+- `planning.md` - intermediate file that outlines fixing steps for the second stage
+
+Second stage - fixing:
+
+- A patch in Hackbot format
+
+## Test the agent
+
+```sh
+BUG_ID=1987675 GIT_COMMIT=5477e3882d4e18f93de9f56b31e90533fd23b0d1 \
+FAILURE_TASKS='{"build-linux":"XyU4b_BIRdO_IeK6z_kcQg"}' \
+  docker compose up build-repair-agent --build
+```
+
+Artifacts are written to `~/hackbot/artifacts/`.
+
+## Evaluation
+
+The evaluation dataset is prepared with [build_repair_create_dataset.ipynb](../../notebooks/build_repair_create_dataset.ipynb) and saved to Weights and Biases Weave.
+
+### Run evals
+
+Each dataset row is a Firefox build failure. The harness runs the agent
+on a git worktree at the failure commit, builds the fix, and LLM-judges it against
+the landed commits. Needs a bootstrapped Firefox checkout.
+
+Local (use only for debugging as new agent is not sandboxed):
+
+```sh
+FIREFOX_GIT_REPO=/path/to/firefox \
+  uv run --package hackbot-agent-build-repair --extra eval \
+  python -m evals.eval --no-try-push --limit 1
+```
+
+Docker (reuses the broker container, so no Bugzilla creds passed to the eval container):
+
+```sh
+FIREFOX_GIT_REPO=/path/to/firefox \
+  docker compose --env-file .env -f agents/build-repair/compose.yml run --rm --build build-repair-eval --no-try-push --limit 1
+```
+
+Flags:
+
+`--trials N` - the number of times to run each example
+
+`--parallelism N` - the number of runs to parallelize with Weave
+
+`--judge-model <id>` - Claude model ID for LLM-as-a-judge
+
+`--dataset <ref>` - Weave dataset name
+
+`--no-try-push` - do not run TRY push to verify the results, only local build
+
+`--verbose` - debugging log level
+
+The harness skips examples whose fix
+landed before the production model's training cutoff (`MODEL_CUTOFF_DATES` in
+`evals/verify.py`) to avoid contamination.
+
+Change the models in [config.py](hackbot_agents/build_repair/config.py) to older ones (`claude-opus-4-6`) to test on older datasets.
+
+### W&B metrics
+
+`weave.init` + `weave.Evaluation` log success and diff rates, local and TRY build
+pass rates, LLM fix-matching (analysis/fix quality, ground-truth match,
+acceptance), and `total_cost_usd`.
+
+See https://wandb.ai/moz-bugbug/bugbug-build-repair-eval/weave/evaluations
diff --git a/agents/build-repair/compose.yml b/agents/build-repair/compose.yml
@@ -0,0 +1,66 @@
+services:
+  build-repair-broker:
+    build:
+      context: ../..
+      dockerfile: agents/build-repair/Dockerfile
+      target: broker
+    environment:
+      BUGZILLA_API_URL: ${BUGZILLA_API_URL}
+      BUGZILLA_API_KEY: ${BUGZILLA_API_KEY}
+    expose:
+      - "8765"
+
+  build-repair-agent:
+    build:
+      context: ../..
+      dockerfile: agents/build-repair/Dockerfile
+      target: agent
+    # Per-run inputs are not `:?`-required: the eval service shares this file and
+    # Compose interpolates every service regardless of which one is started, so a
+    # required var here would break `run build-repair-eval`. pydantic AgentInputs
+    # still validates them at runtime.
+    environment:
+      - RUN_ID
+      - BUG_ID=${BUG_ID:-}
+      - GIT_COMMIT=${GIT_COMMIT:-}
+      - FAILURE_TASKS=${FAILURE_TASKS:-}
+      - RUN_TRY_PUSH=${RUN_TRY_PUSH:-false}
+      - BUGZILLA_MCP_URL=http://build-repair-broker:8765/mcp
+      - SOURCE_REPO=/workspace/firefox
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
+      # No uploader locally: summary/logs/artifacts are written under
+      # /artifacts/<run_id>, bind-mounted to the host's ~/hackbot/artifacts.
+      - ARTIFACTS_DIR=/artifacts
+    volumes:
+      - workspace:/workspace
+      - ${HOME}/hackbot/artifacts:/artifacts
+    depends_on:
+      build-repair-broker:
+        condition: service_started
+
+  # Evaluation harness (profile-gated so it stays out of the default lifecycle).
+  # Reuses the broker for Bugzilla, so this container holds no Bugzilla creds --
+  # same isolation as the agent. The Firefox checkout is bind-mounted (faster than
+  # cloning); the harness creates worktrees from it and builds each fix. Run with:
+  #   FIREFOX_GIT_REPO=/path docker compose --env-file .env \
+  #     -f agents/build-repair/compose.yml run --rm build-repair-eval \
+  #     --no-try-push --limit 1
+  build-repair-eval:
+    profiles: ["eval"]
+    build:
+      context: ../..
+      dockerfile: agents/build-repair/Dockerfile
+      target: eval
+    environment:
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
+      - WANDB_API_KEY=${WANDB_API_KEY:-}
+      - BUGZILLA_MCP_URL=http://build-repair-broker:8765/mcp
+      - FIREFOX_GIT_REPO=/firefox
+    volumes:
+      - ${FIREFOX_GIT_REPO:-/firefox}:/firefox
+    depends_on:
+      build-repair-broker:
+        condition: service_started
+
+volumes:
+  workspace:
diff --git a/bugbug/tools/build_repair/__init__.py → agents/build-repair/evals/__init__.py b/bugbug/tools/build_repair/__init__.py → agents/build-repair/evals/__init__.py