Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions agents/test-plan-generator/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
FROM python:3.12 AS builder

COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

ENV UV_PROJECT_ENVIRONMENT=/opt/venv

WORKDIR /app

# Install external deps without building workspace members.
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=VERSION,target=VERSION \
uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-test-plan-generator

RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,target=/app,rw \
uv sync --locked --no-dev --no-editable --package hackbot-agent-test-plan-generator

FROM python:3.12 AS agent

COPY --from=builder /opt/venv /opt/venv
WORKDIR /app

ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PATH="/opt/venv/bin:$PATH"

# The Firefox DevTools MCP server is launched through npx. Firefox itself is
# downloaded at agent startup, so the image only needs Node/npm and the shared
# libraries required by headless Firefox.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
nodejs npm \
ca-certificates \
libgtk-3-0 libdbus-glib-1-2 libx11-xcb1 libxtst6 libxt6 \
libasound2 libpci3 \
&& rm -rf /var/lib/apt/lists/*

# hackbot.toml lives at the agent root (not inside the package), so copy it into
# the working dir; the runtime discovers it there (cwd) at startup.
COPY agents/test-plan-generator/hackbot.toml /app/hackbot.toml

RUN useradd --create-home --shell /bin/bash agent \
&& mkdir -p /workspace \
&& chown agent:agent /workspace

USER agent

CMD ["python", "-m", "hackbot_agents.test_plan_generator"]
20 changes: 20 additions & 0 deletions agents/test-plan-generator/compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
services:
test-plan-generator-agent:
build:
context: ../..
dockerfile: agents/test-plan-generator/Dockerfile
target: agent
environment:
- RUN_ID
- FEATURE_NAME
- FEATURE_DESCRIPTION
- TEST_SCOPE
- MODEL
- MAX_TURNS
- EFFORT
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
# No uploader locally: summary/logs/attachments are written under
# /artifacts/<run_id>, bind-mounted to the host's ~/hackbot/artifacts.
- ARTIFACTS_DIR=/artifacts
volumes:
- ${HOME}/hackbot/artifacts:/artifacts
3 changes: 3 additions & 0 deletions agents/test-plan-generator/hackbot.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# test-plan-generator needs no platform prep: no [source] checkout and no
# [firefox] build. It downloads a fresh Firefox Nightly at startup and drives it
# through the Firefox DevTools MCP server.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from hackbot_runtime import HackbotContext, run_async
from pydantic_settings import BaseSettings, SettingsConfigDict

from .agent import TestPlanGeneratorResult, run_test_plan_generator
from .firefox_install import install_firefox_nightly


class AgentInputs(BaseSettings):
feature_name: str
feature_description: str
test_scope: str
model: str | None = None
max_turns: int | None = None
effort: str | None = None

model_config = SettingsConfigDict(extra="ignore")


async def main(ctx: HackbotContext) -> TestPlanGeneratorResult:
inputs = AgentInputs()

firefox_path = str(install_firefox_nightly())

return await run_test_plan_generator(
feature_name=inputs.feature_name,
feature_description=inputs.feature_description,
test_scope=inputs.test_scope,
model=inputs.model,
max_turns=inputs.max_turns,
effort=inputs.effort,
firefox_path=firefox_path,
log=ctx.log_path,
verbose=True,
)


if __name__ == "__main__":
run_async(main)
129 changes: 129 additions & 0 deletions agents/test-plan-generator/hackbot_agents/test_plan_generator/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Firefox QA test-plan generator and executor."""

from __future__ import annotations

import logging
from pathlib import Path

from claude_agent_sdk import (
ClaudeAgentOptions,
ClaudeSDKClient,
McpServerConfig,
ResultMessage,
)
from hackbot_runtime import AgentError, HackbotAgentResult
from hackbot_runtime.claude import Reporter

from .config import DEVTOOLS_TOOLS
from .devtools_mcp import build_devtools_server
from .result import (
RESULT_SERVER_NAME,
SUBMIT_RESULT_TOOL,
ResultCollector,
TestPlanResult,
build_result_server,
)

HERE = Path(__file__).resolve().parent

logger = logging.getLogger("test-plan-generator")


class TestPlanGeneratorResult(HackbotAgentResult):
result: TestPlanResult | None = None


def load_system_prompt() -> str:
return (HERE / "prompts" / "system.md").read_text()


def build_user_prompt(
feature_name: str, feature_description: str, test_scope: str
) -> str:
return (
"Generate and run a Firefox QA test plan from these inputs.\n\n"
f"Feature name:\n{feature_name}\n\n"
f"Feature description:\n{feature_description}\n\n"
f"Test scope:\n{test_scope}\n\n"
"Use the provided feature name as the structured result feature. The "
"generated test cases must stay within the test scope.\n\n"
"Follow the required workflow exactly: generate the appropriate number "
"of cases first, with no more than 20 cases, run them in order, stop "
"each case on first failed step, and submit the structured result."
)


async def run_test_plan_generator(
*,
feature_name: str,
feature_description: str,
test_scope: str,
model: str | None = None,
max_turns: int | None = None,
effort: str | None = None,
firefox_path: str | None = None,
verbose: bool = False,
log: Path | None = None,
) -> TestPlanGeneratorResult:
"""Generate and run a Firefox QA test plan for one feature."""
subject = feature_name
logger.info("generating Firefox QA test plan for %s", subject)

devtools_server = build_devtools_server(
firefox_path=Path(firefox_path) if firefox_path else None,
headless=True,
enable_script=True,
)

result_collector = ResultCollector()
result_server = build_result_server(result_collector)

mcp_servers: dict[str, McpServerConfig] = {
"firefox-devtools": devtools_server,
RESULT_SERVER_NAME: result_server,
}

options = ClaudeAgentOptions(
system_prompt=load_system_prompt(),
mcp_servers=mcp_servers,
permission_mode="bypassPermissions",
allowed_tools=[
*DEVTOOLS_TOOLS,
SUBMIT_RESULT_TOOL,
],
model=model,
max_turns=max_turns,
**({"effort": effort} if effort else {}),
setting_sources=[],
max_buffer_size=10 * 1024 * 1024,
)

result_msg: ResultMessage | None = None
with Reporter(verbose=verbose, log_path=log) as reporter:
reporter.header(subject)
async with ClaudeSDKClient(options=options) as client:
await client.query(
build_user_prompt(feature_name, feature_description, test_scope)
)
async for msg in client.receive_response():
reporter.message(msg)
if isinstance(msg, ResultMessage):
result_msg = msg

if result_msg is None:
raise AgentError(f"{subject}: agent produced no result message")
if result_msg.is_error:
raise AgentError(
f"{subject} test-plan generation failed: "
f"{result_msg.result or result_msg.subtype}"
)
if result_collector.result is None:
raise AgentError(
f"{subject}: agent finished without submitting a result via submit_result"
)

return TestPlanGeneratorResult(
result=result_collector.result,
num_turns=result_msg.num_turns,
total_cost_usd=result_msg.total_cost_usd,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Firefox DevTools MCP tools (@mozilla/firefox-devtools-mcp-moz), exposed under
# the "firefox-devtools" server name. The QA agent may choose content/page tools
# for web-page behavior or chrome-context tools for Firefox browser UI.
DEVTOOLS_TOOLS = [
"mcp__firefox-devtools__list_pages",
"mcp__firefox-devtools__new_page",
"mcp__firefox-devtools__navigate_page",
"mcp__firefox-devtools__select_page",
"mcp__firefox-devtools__close_page",
"mcp__firefox-devtools__take_snapshot",
"mcp__firefox-devtools__resolve_uid_to_selector",
"mcp__firefox-devtools__clear_snapshot",
"mcp__firefox-devtools__click_by_uid",
"mcp__firefox-devtools__hover_by_uid",
"mcp__firefox-devtools__fill_by_uid",
"mcp__firefox-devtools__fill_form_by_uid",
"mcp__firefox-devtools__drag_by_uid_to_uid",
"mcp__firefox-devtools__upload_file_by_uid",
"mcp__firefox-devtools__list_console_messages",
"mcp__firefox-devtools__clear_console_messages",
"mcp__firefox-devtools__list_network_requests",
"mcp__firefox-devtools__get_network_request",
"mcp__firefox-devtools__screenshot_page",
"mcp__firefox-devtools__screenshot_by_uid",
"mcp__firefox-devtools__evaluate_script",
"mcp__firefox-devtools__accept_dialog",
"mcp__firefox-devtools__dismiss_dialog",
"mcp__firefox-devtools__navigate_history",
"mcp__firefox-devtools__set_viewport_size",
"mcp__firefox-devtools__get_firefox_info",
"mcp__firefox-devtools__get_firefox_output",
"mcp__firefox-devtools__list_chrome_contexts",
"mcp__firefox-devtools__select_chrome_context",
"mcp__firefox-devtools__evaluate_chrome_script",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from __future__ import annotations

from pathlib import Path

from claude_agent_sdk.types import McpStdioServerConfig

PACKAGE = "@mozilla/firefox-devtools-mcp-moz"


def build_devtools_server(
firefox_path: Path | None = None,
*,
headless: bool = True,
enable_script: bool = True,
) -> McpStdioServerConfig:
"""Build the stdio config for the Firefox DevTools MCP server."""
args = [PACKAGE]
if headless:
args.append("--headless")
if enable_script:
args.append("--enable-script")
if firefox_path is not None:
args += ["--firefox-path", str(firefox_path)]

return McpStdioServerConfig(
command="npx",
args=args,
env={"MOZ_REMOTE_ALLOW_SYSTEM_ACCESS": "1"},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Download and install a prebuilt Firefox Nightly for the agent to drive."""

from __future__ import annotations

import logging
import platform
import shutil
from pathlib import Path

import mozdownload
import mozinstall

INSTALL_DIR = Path.home() / "firefox"
BRANCH = "mozilla-central"

logger = logging.getLogger("test-plan-generator")


def install_firefox_nightly() -> Path:
mozdownload_platform = (
"linux-arm64" if platform.machine() in ("aarch64", "arm64") else None
)

if INSTALL_DIR.exists():
shutil.rmtree(INSTALL_DIR)
INSTALL_DIR.mkdir(parents=True)

logger.info("downloading Firefox Nightly...")
scraper = mozdownload.FactoryScraper(
"daily",
branch=BRANCH,
platform=mozdownload_platform,
destination=str(INSTALL_DIR),
)
archive = scraper.download()

install_folder = mozinstall.install(archive, str(INSTALL_DIR))
binary = Path(mozinstall.get_binary(install_folder, "firefox"))

logger.info("installed Firefox at %s", binary)
return binary
Loading