diff --git a/agents/test-plan-generator/Dockerfile b/agents/test-plan-generator/Dockerfile new file mode 100644 index 0000000000..ef082714da --- /dev/null +++ b/agents/test-plan-generator/Dockerfile @@ -0,0 +1,50 @@ +FROM python:3.12 AS builder + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ + +ENV UV_PROJECT_ENVIRONMENT=/opt/venv + +WORKDIR /app + +# Install external deps without building workspace members. +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ + --mount=type=bind,source=uv.lock,target=uv.lock \ + --mount=type=bind,source=VERSION,target=VERSION \ + uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-test-plan-generator + +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,target=/app,rw \ + uv sync --locked --no-dev --no-editable --package hackbot-agent-test-plan-generator + +FROM python:3.12 AS agent + +COPY --from=builder /opt/venv /opt/venv +WORKDIR /app + +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PATH="/opt/venv/bin:$PATH" + +# The Firefox DevTools MCP server is launched through npx. Firefox itself is +# downloaded at agent startup, so the image only needs Node/npm and the shared +# libraries required by headless Firefox. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + nodejs npm \ + ca-certificates \ + libgtk-3-0 libdbus-glib-1-2 libx11-xcb1 libxtst6 libxt6 \ + libasound2 libpci3 \ + && rm -rf /var/lib/apt/lists/* + +# hackbot.toml lives at the agent root (not inside the package), so copy it into +# the working dir; the runtime discovers it there (cwd) at startup. +COPY agents/test-plan-generator/hackbot.toml /app/hackbot.toml + +RUN useradd --create-home --shell /bin/bash agent \ + && mkdir -p /workspace \ + && chown agent:agent /workspace + +USER agent + +CMD ["python", "-m", "hackbot_agents.test_plan_generator"] diff --git a/agents/test-plan-generator/compose.yml b/agents/test-plan-generator/compose.yml new file mode 100644 index 0000000000..987bafbb12 --- /dev/null +++ b/agents/test-plan-generator/compose.yml @@ -0,0 +1,20 @@ +services: + test-plan-generator-agent: + build: + context: ../.. + dockerfile: agents/test-plan-generator/Dockerfile + target: agent + environment: + - RUN_ID + - FEATURE_NAME + - FEATURE_DESCRIPTION + - TEST_SCOPE + - MODEL + - MAX_TURNS + - EFFORT + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error} + # No uploader locally: summary/logs/attachments are written under + # /artifacts/, bind-mounted to the host's ~/hackbot/artifacts. + - ARTIFACTS_DIR=/artifacts + volumes: + - ${HOME}/hackbot/artifacts:/artifacts diff --git a/agents/test-plan-generator/hackbot.toml b/agents/test-plan-generator/hackbot.toml new file mode 100644 index 0000000000..5912e57781 --- /dev/null +++ b/agents/test-plan-generator/hackbot.toml @@ -0,0 +1,3 @@ +# test-plan-generator needs no platform prep: no [source] checkout and no +# [firefox] build. It downloads a fresh Firefox Nightly at startup and drives it +# through the Firefox DevTools MCP server. diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/__init__.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/__init__.py @@ -0,0 +1 @@ + diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/__main__.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/__main__.py new file mode 100644 index 0000000000..af67eac60b --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/__main__.py @@ -0,0 +1,38 @@ +from hackbot_runtime import HackbotContext, run_async +from pydantic_settings import BaseSettings, SettingsConfigDict + +from .agent import TestPlanGeneratorResult, run_test_plan_generator +from .firefox_install import install_firefox_nightly + + +class AgentInputs(BaseSettings): + feature_name: str + feature_description: str + test_scope: str + model: str | None = None + max_turns: int | None = None + effort: str | None = None + + model_config = SettingsConfigDict(extra="ignore") + + +async def main(ctx: HackbotContext) -> TestPlanGeneratorResult: + inputs = AgentInputs() + + firefox_path = str(install_firefox_nightly()) + + return await run_test_plan_generator( + feature_name=inputs.feature_name, + feature_description=inputs.feature_description, + test_scope=inputs.test_scope, + model=inputs.model, + max_turns=inputs.max_turns, + effort=inputs.effort, + firefox_path=firefox_path, + log=ctx.log_path, + verbose=True, + ) + + +if __name__ == "__main__": + run_async(main) diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/agent.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/agent.py new file mode 100644 index 0000000000..a680f52d17 --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/agent.py @@ -0,0 +1,129 @@ +"""Firefox QA test-plan generator and executor.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from claude_agent_sdk import ( + ClaudeAgentOptions, + ClaudeSDKClient, + McpServerConfig, + ResultMessage, +) +from hackbot_runtime import AgentError, HackbotAgentResult +from hackbot_runtime.claude import Reporter + +from .config import DEVTOOLS_TOOLS +from .devtools_mcp import build_devtools_server +from .result import ( + RESULT_SERVER_NAME, + SUBMIT_RESULT_TOOL, + ResultCollector, + TestPlanResult, + build_result_server, +) + +HERE = Path(__file__).resolve().parent + +logger = logging.getLogger("test-plan-generator") + + +class TestPlanGeneratorResult(HackbotAgentResult): + result: TestPlanResult | None = None + + +def load_system_prompt() -> str: + return (HERE / "prompts" / "system.md").read_text() + + +def build_user_prompt( + feature_name: str, feature_description: str, test_scope: str +) -> str: + return ( + "Generate and run a Firefox QA test plan from these inputs.\n\n" + f"Feature name:\n{feature_name}\n\n" + f"Feature description:\n{feature_description}\n\n" + f"Test scope:\n{test_scope}\n\n" + "Use the provided feature name as the structured result feature. The " + "generated test cases must stay within the test scope.\n\n" + "Follow the required workflow exactly: generate the appropriate number " + "of cases first, with no more than 20 cases, run them in order, stop " + "each case on first failed step, and submit the structured result." + ) + + +async def run_test_plan_generator( + *, + feature_name: str, + feature_description: str, + test_scope: str, + model: str | None = None, + max_turns: int | None = None, + effort: str | None = None, + firefox_path: str | None = None, + verbose: bool = False, + log: Path | None = None, +) -> TestPlanGeneratorResult: + """Generate and run a Firefox QA test plan for one feature.""" + subject = feature_name + logger.info("generating Firefox QA test plan for %s", subject) + + devtools_server = build_devtools_server( + firefox_path=Path(firefox_path) if firefox_path else None, + headless=True, + enable_script=True, + ) + + result_collector = ResultCollector() + result_server = build_result_server(result_collector) + + mcp_servers: dict[str, McpServerConfig] = { + "firefox-devtools": devtools_server, + RESULT_SERVER_NAME: result_server, + } + + options = ClaudeAgentOptions( + system_prompt=load_system_prompt(), + mcp_servers=mcp_servers, + permission_mode="bypassPermissions", + allowed_tools=[ + *DEVTOOLS_TOOLS, + SUBMIT_RESULT_TOOL, + ], + model=model, + max_turns=max_turns, + **({"effort": effort} if effort else {}), + setting_sources=[], + max_buffer_size=10 * 1024 * 1024, + ) + + result_msg: ResultMessage | None = None + with Reporter(verbose=verbose, log_path=log) as reporter: + reporter.header(subject) + async with ClaudeSDKClient(options=options) as client: + await client.query( + build_user_prompt(feature_name, feature_description, test_scope) + ) + async for msg in client.receive_response(): + reporter.message(msg) + if isinstance(msg, ResultMessage): + result_msg = msg + + if result_msg is None: + raise AgentError(f"{subject}: agent produced no result message") + if result_msg.is_error: + raise AgentError( + f"{subject} test-plan generation failed: " + f"{result_msg.result or result_msg.subtype}" + ) + if result_collector.result is None: + raise AgentError( + f"{subject}: agent finished without submitting a result via submit_result" + ) + + return TestPlanGeneratorResult( + result=result_collector.result, + num_turns=result_msg.num_turns, + total_cost_usd=result_msg.total_cost_usd, + ) diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/config.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/config.py new file mode 100644 index 0000000000..3760e2ff42 --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/config.py @@ -0,0 +1,38 @@ +# Firefox DevTools MCP tools (@mozilla/firefox-devtools-mcp-moz), exposed under +# the "firefox-devtools" server name. Keep this focused on tools needed to +# generate and execute Firefox QA cases: page interaction, observation, +# content scripts, privileged browser UI scripts, and prefs. +DEVTOOLS_TOOLS = [ + "mcp__firefox-devtools__list_pages", + "mcp__firefox-devtools__new_page", + "mcp__firefox-devtools__navigate_page", + "mcp__firefox-devtools__select_page", + "mcp__firefox-devtools__close_page", + "mcp__firefox-devtools__navigate_history", + "mcp__firefox-devtools__set_viewport_size", + "mcp__firefox-devtools__list_console_messages", + "mcp__firefox-devtools__clear_console_messages", + "mcp__firefox-devtools__list_network_requests", + "mcp__firefox-devtools__get_network_request", + "mcp__firefox-devtools__screenshot_page", + "mcp__firefox-devtools__screenshot_by_uid", + "mcp__firefox-devtools__get_firefox_info", + "mcp__firefox-devtools__get_firefox_output", + "mcp__firefox-devtools__take_snapshot", + "mcp__firefox-devtools__resolve_uid_to_selector", + "mcp__firefox-devtools__clear_snapshot", + "mcp__firefox-devtools__click_by_uid", + "mcp__firefox-devtools__hover_by_uid", + "mcp__firefox-devtools__fill_by_uid", + "mcp__firefox-devtools__fill_form_by_uid", + "mcp__firefox-devtools__drag_by_uid_to_uid", + "mcp__firefox-devtools__upload_file_by_uid", + "mcp__firefox-devtools__accept_dialog", + "mcp__firefox-devtools__dismiss_dialog", + "mcp__firefox-devtools__evaluate_script", + "mcp__firefox-devtools__list_privileged_contexts", + "mcp__firefox-devtools__select_privileged_context", + "mcp__firefox-devtools__evaluate_privileged_script", + "mcp__firefox-devtools__get_firefox_prefs", + "mcp__firefox-devtools__set_firefox_prefs", +] diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/devtools_mcp.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/devtools_mcp.py new file mode 100644 index 0000000000..d94ffbc424 --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/devtools_mcp.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from pathlib import Path + +from claude_agent_sdk.types import McpStdioServerConfig + +PACKAGE = "@mozilla/firefox-devtools-mcp-moz" + + +def build_devtools_server( + firefox_path: Path | None = None, + *, + headless: bool = True, + enable_script: bool = True, + enable_privileged_context: bool = True, +) -> McpStdioServerConfig: + """Build the stdio config for the Firefox DevTools MCP server.""" + args = [PACKAGE] + if headless: + args.append("--headless") + if enable_script: + args.append("--enable-script") + if enable_privileged_context: + args.append("--enable-privileged-context") + if firefox_path is not None: + args += ["--firefox-path", str(firefox_path)] + + return McpStdioServerConfig( + command="npx", + args=args, + env={ + "MOZ_REMOTE_ALLOW_SYSTEM_ACCESS": "1", + }, + ) diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/firefox_install.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/firefox_install.py new file mode 100644 index 0000000000..0ea2d8731c --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/firefox_install.py @@ -0,0 +1,41 @@ +"""Download and install a prebuilt Firefox Nightly for the agent to drive.""" + +from __future__ import annotations + +import logging +import platform +import shutil +from pathlib import Path + +import mozdownload +import mozinstall + +INSTALL_DIR = Path.home() / "firefox" +BRANCH = "mozilla-central" + +logger = logging.getLogger("test-plan-generator") + + +def install_firefox_nightly() -> Path: + mozdownload_platform = ( + "linux-arm64" if platform.machine() in ("aarch64", "arm64") else None + ) + + if INSTALL_DIR.exists(): + shutil.rmtree(INSTALL_DIR) + INSTALL_DIR.mkdir(parents=True) + + logger.info("downloading Firefox Nightly...") + scraper = mozdownload.FactoryScraper( + "daily", + branch=BRANCH, + platform=mozdownload_platform, + destination=str(INSTALL_DIR), + ) + archive = scraper.download() + + install_folder = mozinstall.install(archive, str(INSTALL_DIR)) + binary = Path(mozinstall.get_binary(install_folder, "firefox")) + + logger.info("installed Firefox at %s", binary) + return binary diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/prompts/system.md b/agents/test-plan-generator/hackbot_agents/test_plan_generator/prompts/system.md new file mode 100644 index 0000000000..b9c2083a99 --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/prompts/system.md @@ -0,0 +1,68 @@ +You are a Firefox QA test-plan generation and execution agent. + +Generate test cases from the provided Firefox feature name, feature description, +and test scope, run them in Firefox with the available DevTools MCP tools, and +report only pass/fail/unsuitable results. Do not try to fix, patch or make changes. + +## Required workflow + +1. Generate the appropriate number of test cases before running any case. + Generate no more than 20 test cases. + - Use the provided feature name as the structured result feature. + - Stay within the test scope. +2. Each test case must have: + - A short title. + - A primary execution context label: `chrome` or `content`. + - Use concise ordered test steps. +3. Run the generated cases and steps in order. +4. Submit one final structured result with `submit_result`. + +## Context guidance + +Choose a primary context label per case: `content` for normal web page or +document behavior; `chrome` for Firefox UI, browser state, preferences, toolbar, +menus, panels, downloads, history, bookmarks, PDF viewer chrome behavior, or +uncertainty. The label describes what the case mainly exercises; it does not +restrict per-step tool choice. + +Use the most appropriate DevTools MCP tool for each step. Prefer content tools +for page/DOM interaction and privileged-context tools for browser UI/state or +assertions unavailable from page context. Do not use privileged tools merely to +bypass a failing content interaction. + +## Execution rules + +- Do not skip, reorder, combine, or rewrite steps after generation. +- Call only the tools needed for the current step. +- If a step fails, mark that step failed, mark the case failed, stop that case, + and move to the next case. +- When a step fails, include a concise failure reason based only on observed + behavior. +- When a case fails or is unsuitable, include a concise case-level reason. +- Do not try alternate approaches to make a failing step pass. + +## Test case style + +Use concise, manual-QA-style titles and steps. + +## Unsuitable cases + +Mark a case as `unsuitable` only if it requires: + +- Restarting Firefox during the test flow. +- Pixel-perfect or visual comparison. +- Installing external apps beyond basic add-ons. +- Confirming real hardware behavior such as microphone, camera, or printer. +- Changing, verifying, or interacting with OS/system settings. +- Changing, verifying, or interacting with the system desktop or OS UI. +- Firefox Sync, cross-device verification, or account-sync behavior. +- Behavior no available tool can execute or observe. + +## Reporting + +The final answer must be submitted through `submit_result` exactly once. A prose +message is not enough. Include one case result for every generated test case. + +For failed steps, set `failure_reason` to a short explanation of the observed +failure. For failed or unsuitable cases, set the case-level `failure_reason` as +well. Leave `failure_reason` empty for passed steps and passed cases. diff --git a/agents/test-plan-generator/hackbot_agents/test_plan_generator/result.py b/agents/test-plan-generator/hackbot_agents/test_plan_generator/result.py new file mode 100644 index 0000000000..2e3e35b94b --- /dev/null +++ b/agents/test-plan-generator/hackbot_agents/test_plan_generator/result.py @@ -0,0 +1,133 @@ +"""Structured result reporting for the test-plan-generator agent.""" + +from __future__ import annotations + +from typing import Literal + +from claude_agent_sdk import McpServerConfig, create_sdk_mcp_server, tool +from pydantic import BaseModel, Field, ValidationError, model_validator + +RESULT_SERVER_NAME = "test-plan-generator" +SUBMIT_RESULT_TOOL = f"mcp__{RESULT_SERVER_NAME}__submit_result" + + +class GeneratedTestCase(BaseModel): + id: int = Field(description="Sequential case id starting at 1.") + title: str + context: Literal["chrome", "content"] = Field( + description=( + "Primary context label for what the case mainly exercises. This is " + "guidance for tool selection, not a restriction on which available " + "DevTools MCP tools may be used for individual steps." + ) + ) + preconditions: str | None = None + steps: list[str] = Field( + description="Concise test steps for this case; between 1 and 6 steps." + ) + + @model_validator(mode="after") + def _validate_steps(self) -> "GeneratedTestCase": + if not 1 <= len(self.steps) <= 6: + raise ValueError("each generated test case must have 1 to 6 steps") + return self + + +class StepResult(BaseModel): + step_number: int + status: Literal["passed", "failed", "not_run"] + observation: str + failure_reason: str | None = Field( + default=None, + description=( + "Required when status is failed. A concise reason why the step failed, " + "based only on what was observed during execution." + ), + ) + + @model_validator(mode="after") + def _validate_failure_reason(self) -> "StepResult": + if self.status == "failed" and not self.failure_reason: + raise ValueError("failed steps must include failure_reason") + return self + + +class TestCaseResult(BaseModel): + id: int + status: Literal["passed", "failed", "unsuitable"] + step_results: list[StepResult] + summary: str + failure_reason: str | None = Field( + default=None, + description=( + "Required when status is failed or unsuitable. A concise reason why " + "the case failed or could not be run, useful for later developer review." + ), + ) + + @model_validator(mode="after") + def _validate_failure_reason(self) -> "TestCaseResult": + if self.status in {"failed", "unsuitable"} and not self.failure_reason: + raise ValueError("failed or unsuitable cases must include failure_reason") + return self + + +class TestPlanResult(BaseModel): + feature: str + generated_test_cases: list[GeneratedTestCase] + results: list[TestCaseResult] + summary: str + + @model_validator(mode="after") + def _validate_result(self) -> "TestPlanResult": + case_count = len(self.generated_test_cases) + if not 1 <= case_count <= 20: + raise ValueError("generated_test_cases must contain 1 to 20 cases") + + case_ids = [case.id for case in self.generated_test_cases] + expected_ids = list(range(1, case_count + 1)) + if case_ids != expected_ids: + raise ValueError("generated test case ids must be sequential starting at 1") + + result_ids = [result.id for result in self.results] + if result_ids != expected_ids: + raise ValueError( + "results must contain one result for each generated test case id" + ) + + return self + + +SUBMIT_RESULT_SCHEMA = { + **TestPlanResult.model_json_schema(), + "additionalProperties": False, +} + + +class ResultCollector: + """Holds the result submitted by the agent, if any.""" + + def __init__(self) -> None: + self.result: TestPlanResult | None = None + + +def build_result_server(collector: ResultCollector) -> McpServerConfig: + """Build an in-process MCP server exposing the ``submit_result`` tool.""" + + @tool( + "submit_result", + "Submit the final generated Firefox QA test plan and execution result. " + "Call exactly once, after all generated test cases have been run.", + SUBMIT_RESULT_SCHEMA, + ) + async def submit_result(args: dict) -> dict: + try: + collector.result = TestPlanResult.model_validate(args) + except ValidationError as exc: + return { + "content": [{"type": "text", "text": f"Invalid result: {exc}"}], + "is_error": True, + } + return {"content": [{"type": "text", "text": "Result recorded."}]} + + return create_sdk_mcp_server(name=RESULT_SERVER_NAME, tools=[submit_result]) diff --git a/agents/test-plan-generator/pyproject.toml b/agents/test-plan-generator/pyproject.toml new file mode 100644 index 0000000000..7aeaaf5718 --- /dev/null +++ b/agents/test-plan-generator/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "hackbot-agent-test-plan-generator" +version = "0.1.0" +description = "Cloud Run Job image that generates and runs Firefox QA test plans" +requires-python = ">=3.12" +dependencies = [ + "hackbot-runtime[claude-sdk]", + "claude-agent-sdk>=0.1.30", + "mcp>=1.0.0", + "mozdownload", + "mozinstall", +] + +[tool.uv.sources] +hackbot-runtime = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["hackbot_agents"] diff --git a/docker-compose.yml b/docker-compose.yml index cc534a3242..4a0477037a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ version: "3.8" include: - path: agents/bug-fix/compose.yml + - path: agents/test-plan-generator/compose.yml services: bugbug-base: diff --git a/services/hackbot-api/app/agents.py b/services/hackbot-api/app/agents.py index f5d75f72bc..ef370ecfe7 100644 --- a/services/hackbot-api/app/agents.py +++ b/services/hackbot-api/app/agents.py @@ -4,7 +4,11 @@ from pydantic import BaseModel -from app.schemas import AutowebcompatReproInputs, BugFixInputs +from app.schemas import ( + AutowebcompatReproInputs, + BugFixInputs, + TestPlanGeneratorInputs, +) @dataclass(frozen=True) @@ -57,4 +61,13 @@ def model_to_env(inputs: BaseModel) -> dict[str, str]: job_name="hackbot-agent-autowebcompat-repro", input_schema=AutowebcompatReproInputs, ), + "test-plan-generator": AgentSpec( + name="test-plan-generator", + description=( + "Generate Firefox QA test cases from feature details (up to 20 test cases), " + "run them in Firefox through DevTools MCP, and report pass/fail/unsuitable results." + ), + job_name="hackbot-agent-test-plan-generator", + input_schema=TestPlanGeneratorInputs, + ), } diff --git a/services/hackbot-api/app/schemas.py b/services/hackbot-api/app/schemas.py index f54d133d4b..d38cee7ce7 100644 --- a/services/hackbot-api/app/schemas.py +++ b/services/hackbot-api/app/schemas.py @@ -81,3 +81,12 @@ def _require_subject(self) -> "AutowebcompatReproInputs": if self.bug_data is None and self.bug_id is None: raise ValueError("provide at least one of bug_data or bug_id") return self + + +class TestPlanGeneratorInputs(BaseModel): + feature_name: str + feature_description: str + test_scope: str + model: str | None = None + max_turns: int | None = None + effort: str | None = None diff --git a/services/hackbot-api/tests/test_agents.py b/services/hackbot-api/tests/test_agents.py index c99c9d4689..6816efbe49 100644 --- a/services/hackbot-api/tests/test_agents.py +++ b/services/hackbot-api/tests/test_agents.py @@ -1,7 +1,14 @@ """Tests for the agent registry and generic env serialization.""" +import pytest from app.agents import AGENT_REGISTRY, model_to_env -from app.schemas import BugFixInputs +from app.schemas import ( + BugFixInputs, +) +from app.schemas import ( + TestPlanGeneratorInputs as PlanGeneratorInputs, +) +from pydantic import ValidationError def test_model_to_env_uppercases_and_stringifies(): @@ -30,3 +37,35 @@ def test_bug_fix_registry_uses_default_env_serializer(): # No hand-written build_env: the router falls back to model_to_env. assert spec.build_env is None assert spec.input_schema is BugFixInputs + + +def test_test_plan_generator_inputs_require_feature_description(): + with pytest.raises(ValidationError): + PlanGeneratorInputs( + feature_name="Bookmarks and History", + test_scope="Bookmarks toolbar behavior.", + ) + + +def test_test_plan_generator_env_serialization(): + env = model_to_env( + PlanGeneratorInputs( + feature_name="Bookmarks and History", + feature_description="Bookmarks and history controls in Firefox.", + test_scope="Bookmarks toolbar behavior.", + ) + ) + + assert env == { + "FEATURE_NAME": "Bookmarks and History", + "FEATURE_DESCRIPTION": "Bookmarks and history controls in Firefox.", + "TEST_SCOPE": "Bookmarks toolbar behavior.", + } + + +def test_test_plan_generator_registry_uses_default_env_serializer(): + spec = AGENT_REGISTRY["test-plan-generator"] + + assert spec.build_env is None + assert spec.job_name == "hackbot-agent-test-plan-generator" + assert spec.input_schema is PlanGeneratorInputs diff --git a/uv.lock b/uv.lock index bb6173d69e..4c432bcdb0 100644 --- a/uv.lock +++ b/uv.lock @@ -24,6 +24,7 @@ members = [ "bugbug-mcp", "hackbot-agent-autowebcompat-repro", "hackbot-agent-bug-fix", + "hackbot-agent-test-plan-generator", "hackbot-api", "hackbot-runtime", "reviewhelper-api", @@ -2417,6 +2418,27 @@ requires-dist = [ { name = "uvicorn", specifier = ">=0.27.0" }, ] +[[package]] +name = "hackbot-agent-test-plan-generator" +version = "0.1.0" +source = { editable = "agents/test-plan-generator" } +dependencies = [ + { name = "claude-agent-sdk" }, + { name = "hackbot-runtime", extra = ["claude-sdk"] }, + { name = "mcp" }, + { name = "mozdownload" }, + { name = "mozinstall" }, +] + +[package.metadata] +requires-dist = [ + { name = "claude-agent-sdk", specifier = ">=0.1.30" }, + { name = "hackbot-runtime", extras = ["claude-sdk"], editable = "libs/hackbot-runtime" }, + { name = "mcp", specifier = ">=1.0.0" }, + { name = "mozdownload" }, + { name = "mozinstall" }, +] + [[package]] name = "hackbot-api" version = "0.1.0"