Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions agents/autowebcompat-repro/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
FROM python:3.12 AS builder

COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

ENV UV_PROJECT_ENVIRONMENT=/opt/venv

WORKDIR /app

# Install external deps without building workspace members.
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=VERSION,target=VERSION \
uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-autowebcompat-repro

RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,target=/app,rw \
uv sync --locked --no-dev --no-editable --package hackbot-agent-autowebcompat-repro

FROM python:3.12 AS base

COPY --from=builder /opt/venv /opt/venv
WORKDIR /app

ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PATH="/opt/venv/bin:$PATH"

FROM base AS agent

# The Firefox DevTools MCP server is an npm package launched via `npx`, so the
# agent image needs Node.js + npm (the python base ships neither). It also
# needs the shared libraries Firefox requires to run headless; the Firefox
Comment thread
ksy36 marked this conversation as resolved.
# binary itself is downloaded at agent startup (a fresh Nightly per run) via
# mozdownload/mozinstall, not baked in here.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
nodejs npm \
ca-certificates \
libgtk-3-0 libdbus-glib-1-2 libx11-xcb1 libxtst6 libxt6 \
libasound2 libpci3 \
&& rm -rf /var/lib/apt/lists/*

# hackbot.toml lives at the agent root (not inside the package), so copy it into
# the working dir; the runtime discovers it there (cwd) at startup.
COPY agents/autowebcompat-repro/hackbot.toml /app/hackbot.toml

RUN useradd --create-home --shell /bin/bash agent \
&& mkdir -p /workspace \
&& chown agent:agent /workspace

USER agent

CMD ["python", "-m", "hackbot_agents.autowebcompat_repro"]

FROM base AS broker

RUN useradd --create-home --shell /bin/bash broker

USER broker

EXPOSE 8765

CMD ["python", "-m", "hackbot_agents.autowebcompat_repro.broker"]
31 changes: 31 additions & 0 deletions agents/autowebcompat-repro/compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
services:
autowebcompat-repro-broker:
build:
context: ../..
dockerfile: agents/autowebcompat-repro/Dockerfile
target: broker
environment:
BUGZILLA_API_URL: ${BUGZILLA_API_URL}
BUGZILLA_API_KEY: ${BUGZILLA_API_KEY}
expose:
- "8765"

autowebcompat-repro-agent:
build:
context: ../..
dockerfile: agents/autowebcompat-repro/Dockerfile
target: agent
environment:
- RUN_ID
Comment thread
ksy36 marked this conversation as resolved.
- BUG_DATA
- BUG_ID
- BUGZILLA_MCP_URL=http://autowebcompat-repro-broker:8765/mcp
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error}
# No uploader locally: summary/logs/attachments are written under
# /artifacts/<run_id>, bind-mounted to the host's ~/hackbot/artifacts.
- ARTIFACTS_DIR=/artifacts
volumes:
- ${HOME}/hackbot/artifacts:/artifacts
depends_on:
autowebcompat-repro-broker:
condition: service_started
3 changes: 3 additions & 0 deletions agents/autowebcompat-repro/hackbot.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# autowebcompat-repro needs no platform prep: no [source] checkout, no [firefox] build.
# Subject comes from the request (bug_data / bug_id); the DevTools MCP drives a
# Firefox instance installed in the image.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from hackbot_runtime import HackbotContext, run_async
from pydantic_settings import BaseSettings, SettingsConfigDict

from .agent import AutowebcompatReproResult, run_autowebcompat_repro
from .firefox_install import install_firefox_nightly


class AgentInputs(BaseSettings):
bugzilla_mcp_url: str
bug_data: str | None = None
Comment thread
ksy36 marked this conversation as resolved.
bug_id: int | None = None
model: str | None = None
max_turns: int | None = None
effort: str | None = None

model_config = SettingsConfigDict(extra="ignore")


async def main(ctx: HackbotContext) -> AutowebcompatReproResult:
inputs = AgentInputs()

# Provision a fresh Nightly at startup so each run reproduces against a
# current build; drive the binary the install reports back.
firefox_path = str(install_firefox_nightly())

return await run_autowebcompat_repro(
bugzilla_mcp_server={
"type": "http",
"url": inputs.bugzilla_mcp_url,
},
bug_data=inputs.bug_data,
bug_id=inputs.bug_id,
model=inputs.model,
max_turns=inputs.max_turns,
effort=inputs.effort,
firefox_path=firefox_path,
log=ctx.log_path,
verbose=True,
)


if __name__ == "__main__":
run_async(main)
155 changes: 155 additions & 0 deletions agents/autowebcompat-repro/hackbot_agents/autowebcompat_repro/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""Firefox web-compatibility reproduction agent.

Drives an agent that reproduces a broken-site report in Firefox
using the Firefox DevTools MCP. The bug is passed either inline as ``bug_data``
text or a Bugzilla ``bug_id`` (read via Bugzilla broker).
"""

from __future__ import annotations

import logging
from pathlib import Path

from claude_agent_sdk import (
ClaudeAgentOptions,
ClaudeSDKClient,
McpServerConfig,
ResultMessage,
)
from hackbot_runtime import AgentError, HackbotAgentResult
from hackbot_runtime.claude import Reporter

from .config import BUGZILLA_READ_TOOLS, DEVTOOLS_TOOLS
from .devtools_mcp import build_devtools_server
from .result import (
RESULT_SERVER_NAME,
SUBMIT_RESULT_TOOL,
ReproductionResult,
ResultCollector,
build_result_server,
)

HERE = Path(__file__).resolve().parent

logger = logging.getLogger("autowebcompat-repro")


class AutowebcompatReproResult(HackbotAgentResult):
result: ReproductionResult | None = None


def load_system_prompt() -> str:
return (HERE / "prompts" / "system.md").read_text()


def build_user_prompt(bug_data: str | None, bug_id: int | None) -> str:
if bug_data:
return (
"Here is the web-compatibility report to work on:\n\n"
f"{bug_data}\n\n"
"Follow your task procedure."
)
if bug_id is not None:
return (
f"The web-compatibility report to work on is Bugzilla bug {bug_id}. "
"Fetch it using the Bugzilla MCP tools, then follow your task procedure."
)
raise AgentError("neither bug_data nor bug_id was provided")


async def run_autowebcompat_repro(
*,
bugzilla_mcp_server: McpServerConfig,
bug_data: str | None = None,
bug_id: int | None = None,
model: str | None = None,
max_turns: int | None = None,
effort: str | None = None,
firefox_path: str | None = None,
verbose: bool = False,
log: Path | None = None,
) -> AutowebcompatReproResult:
"""Reproduce a web-compat issue and return the agent's findings.

Returns a :class:`AutowebcompatReproResult` on success; raises
:class:`AgentError` if the agent ends in an error.
"""
subject = bug_data if bug_data else f"bug {bug_id}"
preview = subject if len(subject) <= 200 else f"{subject[:200]}..."
logger.info("reproducing %s", preview)

devtools_server = build_devtools_server(
firefox_path=Path(firefox_path) if firefox_path else None,
headless=True,
enable_script=True,
)

# Structured-result MCP server (in-process): the agent calls submit_result
# once at the end, giving a predictable JSON result instead of free text.
result_collector = ResultCollector()
result_server = build_result_server(result_collector)

# Only wire up Bugzilla when there's a bug to fetch. With inline bug_data
# there's nothing to read, so the bugzilla MCP is not available
mcp_servers: dict[str, McpServerConfig] = {
"firefox-devtools": devtools_server,
RESULT_SERVER_NAME: result_server,
}
bugzilla_tools: list[str] = []
if bug_id is not None:
mcp_servers["bugzilla"] = bugzilla_mcp_server
bugzilla_tools = BUGZILLA_READ_TOOLS

system_prompt = load_system_prompt()

options = ClaudeAgentOptions(
system_prompt=system_prompt,
mcp_servers=mcp_servers,
permission_mode="bypassPermissions",
allowed_tools=[
"Read",
"Grep",
"Glob",
"Bash",
*bugzilla_tools,
*DEVTOOLS_TOOLS,
SUBMIT_RESULT_TOOL,
],
model=model,
max_turns=max_turns,
**({"effort": effort} if effort else {}),
setting_sources=[],
# DevTools snapshots/screenshots of complex pages serialize to JSON that
# can exceed the SDK's default 1 MiB message buffer (the reader dies
# fatally if it does). Raise it well above that ceiling.
max_buffer_size=10 * 1024 * 1024,
)

user_prompt = build_user_prompt(bug_data, bug_id)

result_msg: ResultMessage | None = None
with Reporter(verbose=verbose, log_path=log) as reporter:
reporter.header(subject)
async with ClaudeSDKClient(options=options) as client:
await client.query(user_prompt)
async for msg in client.receive_response():
reporter.message(msg)
if isinstance(msg, ResultMessage):
result_msg = msg

if result_msg is None:
raise AgentError(f"{subject}: agent produced no result message")
if result_msg.is_error:
raise AgentError(
f"{subject} investigation failed: {result_msg.result or result_msg.subtype}"
)
if result_collector.result is None:
raise AgentError(
f"{subject}: agent finished without submitting a result via submit_result"
)

return AutowebcompatReproResult(
result=result_collector.result,
num_turns=result_msg.num_turns,
total_cost_usd=result_msg.total_cost_usd,
)
Comment thread
ksy36 marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Bugzilla MCP broker.

Sidecar container that holds the Bugzilla API key and serves the
bugzilla MCP tools over HTTP. The agent process (in a sibling container
in the same Cloud Run Job task) reaches us at `127.0.0.1:<port>/mcp`.
The agent container itself binds no Bugzilla credentials.
"""

import logging
from contextlib import asynccontextmanager

import bugsy
Comment thread
suhaibmujahid marked this conversation as resolved.
import uvicorn
from agent_tools import bugzilla
from agent_tools.bugzilla import BugzillaContext
from agent_tools.claude_sdk import build_sdk_server
from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
from pydantic_settings import BaseSettings, SettingsConfigDict
from starlette.applications import Starlette
from starlette.routing import Mount

log = logging.getLogger("autowebcompat-repro-broker")


class BrokerInputs(BaseSettings):
bugzilla_api_url: str
bugzilla_api_key: str
host: str = "0.0.0.0"
port: int = 8765

model_config = SettingsConfigDict(extra="ignore")


def build_app(inputs: BrokerInputs) -> Starlette:
client = bugsy.Bugsy(
api_key=inputs.bugzilla_api_key, bugzilla_url=inputs.bugzilla_api_url
)
ctx = BugzillaContext(client=client)
sdk_config = build_sdk_server("bugzilla", ctx, bugzilla.TOOLS)
mcp_server = sdk_config["instance"]

manager = StreamableHTTPSessionManager(app=mcp_server, stateless=True)

@asynccontextmanager
async def lifespan(app):
async with manager.run():
log.info(
"bugzilla broker ready on %s:%d (read-only)",
inputs.host,
inputs.port,
)
yield

async def mcp_handler(scope, receive, send):
await manager.handle_request(scope, receive, send)

return Starlette(routes=[Mount("/mcp", app=mcp_handler)], lifespan=lifespan)


def main() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
inputs = BrokerInputs()
app = build_app(inputs)
uvicorn.run(app, host=inputs.host, port=inputs.port, log_config=None)


if __name__ == "__main__":
main()
Loading