fix(backend): use system chromium for agent-browser on all architectures (#12473)

## Summary

- Replaces the arch-conditional chromium install (ARM64 vs AMD64) with a
single approach: always use the distro-packaged `chromium` and set
`AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium`
- Removes `agent-browser install` entirely (it downloads Chrome for
Testing, which has no ARM64 binary)
- Removes the `entrypoint.sh` wrapper script that was setting the env
var at runtime
- Updates `autogpt_platform/db/docker/docker-compose.yml`: removes
`external: true` from the network declarations so the Supabase stack can
be brought up standalone (needed for the Docker integration tests in the
test plan below — without this, `docker compose up` fails unless the
platform stack is already running); also sets
`GOTRUE_MAILER_AUTOCONFIRM: true` for local dev convenience (no SMTP
setup required on first run — this compose file is not used in
production)
- Updates `autogpt_platform/docker-compose.platform.yml`: mounts the
`workspace` volume so agent-browser results (screenshots, snapshots) are
accessible from other services; without this the copilot workspace write
fails in Docker

## Verification

Tested via Docker build on arm64 (Apple Silicon):
```
=== Testing agent-browser with system chromium ===
✓ Example Domain
  https://example.com/
=== SUCCESS: agent-browser launched with system chromium ===
```
agent-browser navigated to example.com in ~1.5s using system chromium
(v146 from Debian trixie).

## Test plan

- [x] Docker build test on arm64: `agent-browser open
https://example.com` succeeds with system chromium
- [x] Verify amd64 Docker build still works (CI)
This commit is contained in:
Zamil Majdy
2026-03-23 20:54:03 +07:00
committed by GitHub
parent e86ac21c43
commit 923b164794
8 changed files with 409 additions and 29 deletions

View File

@@ -121,36 +121,20 @@ RUN ln -s ../lib/node_modules/npm/bin/npm-cli.js /usr/bin/npm \
&& ln -s ../lib/node_modules/npm/bin/npx-cli.js /usr/bin/npx
COPY --from=builder /root/.cache/prisma-python/binaries /root/.cache/prisma-python/binaries
# Install agent-browser (Copilot browser tool) + Chromium.
# On amd64: install runtime libs + run `agent-browser install` to download
# Chrome for Testing (pinned version, tested with Playwright).
# On arm64: install system chromium package — Chrome for Testing has no ARM64
# binary. AGENT_BROWSER_EXECUTABLE_PATH is set at runtime by the entrypoint
# script (below) to redirect agent-browser to the system binary.
ARG TARGETARCH
# Install agent-browser (Copilot browser tool) using the system chromium package.
# Chrome for Testing (the binary agent-browser downloads via `agent-browser install`)
# has no ARM64 builds, so we use the distro-packaged chromium instead — verified to
# work with agent-browser via Docker tests on arm64; amd64 is validated in CI.
# Note: system chromium tracks the Debian package schedule rather than a pinned
# Chrome for Testing release. If agent-browser requires a specific Chrome version,
# verify compatibility against the chromium package version in the base image.
RUN apt-get update \
&& if [ "$TARGETARCH" = "arm64" ]; then \
apt-get install -y --no-install-recommends chromium fonts-liberation; \
else \
apt-get install -y --no-install-recommends \
libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \
libdbus-1-3 libxkbcommon0 libatspi2.0-0t64 libxcomposite1 libxdamage1 \
libxfixes3 libxrandr2 libgbm1 libasound2t64 libpango-1.0-0 libcairo2 \
libx11-6 libx11-xcb1 libxcb1 libxext6 libglib2.0-0t64 \
fonts-liberation libfontconfig1; \
fi \
&& apt-get install -y --no-install-recommends chromium fonts-liberation \
&& rm -rf /var/lib/apt/lists/* \
&& npm install -g agent-browser \
&& ([ "$TARGETARCH" = "arm64" ] || agent-browser install) \
&& rm -rf /tmp/* /root/.npm
# On arm64 the system chromium is at /usr/bin/chromium; set
# AGENT_BROWSER_EXECUTABLE_PATH so agent-browser's daemon uses it instead of
# Chrome for Testing (which has no ARM64 binary). On amd64 the variable is left
# unset so agent-browser uses the Chrome for Testing binary it downloaded above.
RUN printf '#!/bin/sh\n[ -x /usr/bin/chromium ] && export AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium\nexec "$@"\n' \
> /usr/local/bin/entrypoint.sh \
&& chmod +x /usr/local/bin/entrypoint.sh
ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
WORKDIR /app/autogpt_platform/backend
@@ -173,5 +157,4 @@ RUN POETRY_VIRTUALENVS_CREATE=true POETRY_VIRTUALENVS_IN_PROJECT=true \
ENV PORT=8000
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["rest"]

View File

@@ -20,9 +20,9 @@ SSRF protection:
Requires:
npm install -g agent-browser
agent-browser install (downloads Chromium, one-time — skipped in Docker
where system chromium is pre-installed and
AGENT_BROWSER_EXECUTABLE_PATH is set)
In Docker: system chromium package with AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
(set automatically — no `agent-browser install` needed).
Locally: run `agent-browser install` to download Chromium.
"""
import asyncio

View File

@@ -0,0 +1,351 @@
"""Integration tests for agent-browser + system chromium.
These tests actually invoke the agent-browser binary via subprocess and require:
- agent-browser installed (npm install -g agent-browser)
- AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium (set in Docker)
Run with:
poetry run test
Or to run only this file:
poetry run pytest backend/copilot/tools/agent_browser_integration_test.py -v -p no:autogpt_platform
Skipped automatically when agent-browser binary is not found.
Tests that hit external sites are marked ``integration`` and skipped by default
in CI (use ``-m integration`` to include them).
Two test tiers:
- CLI tests: call agent-browser subprocess directly (no backend imports needed)
- Tool class tests: call BrowserNavigateTool/BrowserActTool._execute() directly
with user_id=None (skips workspace/DB interactions — no Postgres/RabbitMQ needed)
"""
import concurrent.futures
import os
import shutil
import subprocess
import tempfile
from datetime import datetime, timezone
from urllib.parse import urlparse
import pytest
from backend.copilot.model import ChatSession
from backend.copilot.tools.agent_browser import BrowserActTool, BrowserNavigateTool
from backend.copilot.tools.models import (
BrowserActResponse,
BrowserNavigateResponse,
ErrorResponse,
)
pytestmark = pytest.mark.skipif(
shutil.which("agent-browser") is None,
reason="agent-browser binary not found",
)
_SESSION = "integration-test-session"
def _agent_browser(
*args: str, session: str = _SESSION, timeout: int = 30
) -> tuple[int, str, str]:
"""Run agent-browser for the given session, return (rc, stdout, stderr)."""
result = subprocess.run(
["agent-browser", "--session", session, "--session-name", session, *args],
capture_output=True,
text=True,
timeout=timeout,
)
return result.returncode, result.stdout, result.stderr
def _close_session(session: str, timeout: int = 5) -> None:
"""Best-effort close for a browser session; never raises on failure."""
try:
subprocess.run(
["agent-browser", "--session", session, "--session-name", session, "close"],
capture_output=True,
timeout=timeout,
)
except (subprocess.TimeoutExpired, OSError):
pass
@pytest.fixture(autouse=True)
def _teardown():
"""Close the shared test session after each test (best-effort)."""
yield
_close_session(_SESSION)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_chromium_executable_env_is_set():
"""AGENT_BROWSER_EXECUTABLE_PATH must be set and point to an executable binary."""
exe = os.environ.get("AGENT_BROWSER_EXECUTABLE_PATH", "")
assert exe, "AGENT_BROWSER_EXECUTABLE_PATH is not set"
assert os.path.isfile(exe), f"Chromium binary not found at {exe}"
assert os.access(exe, os.X_OK), f"Chromium binary at {exe} is not executable"
@pytest.mark.integration
def test_navigate_returns_success():
"""agent-browser can open a public URL using system chromium."""
rc, _, stderr = _agent_browser("open", "https://example.com")
assert rc == 0, f"open failed (rc={rc}): {stderr}"
@pytest.mark.integration
def test_get_title_after_navigate():
"""get title returns the page title after navigation."""
rc, _, _ = _agent_browser("open", "https://example.com")
assert rc == 0
rc, stdout, stderr = _agent_browser("get", "title", timeout=10)
assert rc == 0, f"get title failed: {stderr}"
assert "example" in stdout.lower()
@pytest.mark.integration
def test_get_url_after_navigate():
"""get url returns the navigated URL."""
rc, _, _ = _agent_browser("open", "https://example.com")
assert rc == 0
rc, stdout, stderr = _agent_browser("get", "url", timeout=10)
assert rc == 0, f"get url failed: {stderr}"
assert urlparse(stdout.strip()).netloc == "example.com"
@pytest.mark.integration
def test_snapshot_returns_interactive_elements():
"""snapshot -i -c lists interactive elements on the page."""
rc, _, _ = _agent_browser("open", "https://example.com")
assert rc == 0
rc, stdout, stderr = _agent_browser("snapshot", "-i", "-c", timeout=15)
assert rc == 0, f"snapshot failed: {stderr}"
assert len(stdout.strip()) > 0, "snapshot returned empty output"
@pytest.mark.integration
def test_screenshot_produces_valid_png():
"""screenshot saves a non-empty, valid PNG file."""
rc, _, _ = _agent_browser("open", "https://example.com")
assert rc == 0
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
tmp = f.name
try:
rc, _, stderr = _agent_browser("screenshot", tmp, timeout=15)
assert rc == 0, f"screenshot failed: {stderr}"
size = os.path.getsize(tmp)
assert size > 1000, f"PNG too small ({size} bytes) — likely blank or corrupt"
with open(tmp, "rb") as f:
assert f.read(4) == b"\x89PNG", "Output is not a valid PNG"
finally:
os.unlink(tmp)
@pytest.mark.integration
def test_scroll_down():
"""scroll down succeeds without error."""
rc, _, _ = _agent_browser("open", "https://example.com")
assert rc == 0
rc, _, stderr = _agent_browser("scroll", "down", timeout=10)
assert rc == 0, f"scroll failed: {stderr}"
@pytest.mark.integration
def test_fill_form_field():
"""fill writes text into an input field."""
rc, _, _ = _agent_browser("open", "https://httpbin.org/forms/post")
assert rc == 0
rc, _, stderr = _agent_browser(
"fill", "input[name=custname]", "IntegrationTestUser", timeout=10
)
assert rc == 0, f"fill failed: {stderr}"
@pytest.mark.integration
def test_concurrent_independent_sessions():
"""Two independent sessions can navigate in parallel without interference."""
session_a = "integration-concurrent-a"
session_b = "integration-concurrent-b"
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
fut_a = pool.submit(
_agent_browser, "open", "https://example.com", session=session_a
)
fut_b = pool.submit(
_agent_browser, "open", "https://httpbin.org/html", session=session_b
)
rc_a, _, err_a = fut_a.result(timeout=40)
rc_b, _, err_b = fut_b.result(timeout=40)
assert rc_a == 0, f"session_a open failed: {err_a}"
assert rc_b == 0, f"session_b open failed: {err_b}"
rc_ua, url_a, err_ua = _agent_browser(
"get", "url", session=session_a, timeout=10
)
rc_ub, url_b, err_ub = _agent_browser(
"get", "url", session=session_b, timeout=10
)
assert rc_ua == 0, f"session_a get url failed: {err_ua}"
assert rc_ub == 0, f"session_b get url failed: {err_ub}"
assert urlparse(url_a.strip()).netloc == "example.com"
assert urlparse(url_b.strip()).netloc == "httpbin.org"
finally:
_close_session(session_a)
_close_session(session_b)
@pytest.mark.integration
def test_close_session():
"""close shuts down the browser daemon cleanly."""
rc, _, _ = _agent_browser("open", "https://example.com")
assert rc == 0
rc, _, stderr = _agent_browser("close", timeout=10)
assert rc == 0, f"close failed: {stderr}"
# ---------------------------------------------------------------------------
# Python tool class integration tests
#
# These tests exercise the actual BrowserNavigateTool / BrowserActTool Python
# classes (not just the CLI binary) to verify the full call path — URL
# validation, subprocess dispatch, response parsing — works with system
# chromium. user_id=None skips workspace/DB interactions so no Postgres or
# RabbitMQ is needed.
# ---------------------------------------------------------------------------
_TOOL_SESSION_ID = "integration-tool-test-session"
_TEST_SESSION = ChatSession(
session_id=_TOOL_SESSION_ID,
user_id="test-user",
messages=[],
usage=[],
started_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)
@pytest.fixture(autouse=False)
def _close_tool_session():
"""Tear down the tool-test browser session after each tool test."""
yield
_close_session(_TOOL_SESSION_ID)
@pytest.mark.integration
@pytest.mark.asyncio
async def test_tool_navigate_returns_response(_close_tool_session):
"""BrowserNavigateTool._execute returns a BrowserNavigateResponse with real content."""
tool = BrowserNavigateTool()
resp = await tool._execute(
user_id=None, session=_TEST_SESSION, url="https://example.com"
)
assert isinstance(
resp, BrowserNavigateResponse
), f"Expected BrowserNavigateResponse, got: {resp}"
assert urlparse(resp.url).netloc == "example.com"
assert resp.title, "Expected non-empty page title"
assert resp.snapshot, "Expected non-empty accessibility snapshot"
@pytest.mark.asyncio
@pytest.mark.parametrize(
"ssrf_url",
[
"http://169.254.169.254/", # AWS/GCP/Azure metadata endpoint
"http://127.0.0.1/", # IPv4 loopback
"http://10.0.0.1/", # RFC-1918 private range
"http://[::1]/", # IPv6 loopback
"http://0.0.0.0/", # Wildcard / INADDR_ANY
],
)
async def test_tool_navigate_blocked_url(ssrf_url: str, _close_tool_session):
"""BrowserNavigateTool._execute rejects internal/private URLs (SSRF guard)."""
tool = BrowserNavigateTool()
resp = await tool._execute(user_id=None, session=_TEST_SESSION, url=ssrf_url)
assert isinstance(
resp, ErrorResponse
), f"Expected ErrorResponse for SSRF URL {ssrf_url!r}, got: {resp}"
assert resp.error == "blocked_url"
@pytest.mark.asyncio
async def test_tool_navigate_missing_url(_close_tool_session):
"""BrowserNavigateTool._execute returns an error when url is empty."""
tool = BrowserNavigateTool()
resp = await tool._execute(user_id=None, session=_TEST_SESSION, url="")
assert isinstance(resp, ErrorResponse)
assert resp.error == "missing_url"
@pytest.mark.integration
@pytest.mark.asyncio
async def test_tool_act_scroll(_close_tool_session):
"""BrowserActTool._execute can scroll after a navigate."""
nav = BrowserNavigateTool()
nav_resp = await nav._execute(
user_id=None, session=_TEST_SESSION, url="https://example.com"
)
assert isinstance(nav_resp, BrowserNavigateResponse)
act = BrowserActTool()
resp = await act._execute(
user_id=None, session=_TEST_SESSION, action="scroll", direction="down"
)
assert isinstance(
resp, BrowserActResponse
), f"Expected BrowserActResponse, got: {resp}"
assert resp.action == "scroll"
@pytest.mark.integration
@pytest.mark.asyncio
async def test_tool_act_fill_and_click(_close_tool_session):
"""BrowserActTool._execute can fill a form field."""
nav = BrowserNavigateTool()
nav_resp = await nav._execute(
user_id=None, session=_TEST_SESSION, url="https://httpbin.org/forms/post"
)
assert isinstance(nav_resp, BrowserNavigateResponse)
act = BrowserActTool()
resp = await act._execute(
user_id=None,
session=_TEST_SESSION,
action="fill",
target="input[name=custname]",
value="ToolIntegrationTest",
)
assert isinstance(resp, BrowserActResponse), f"fill failed: {resp}"
@pytest.mark.asyncio
async def test_tool_act_missing_action(_close_tool_session):
"""BrowserActTool._execute returns an error when action is missing."""
act = BrowserActTool()
resp = await act._execute(user_id=None, session=_TEST_SESSION, action="")
assert isinstance(resp, ErrorResponse)
assert resp.error == "missing_action"
@pytest.mark.asyncio
async def test_tool_act_missing_target(_close_tool_session):
"""BrowserActTool._execute returns an error when click target is missing."""
act = BrowserActTool()
resp = await act._execute(
user_id=None, session=_TEST_SESSION, action="click", target=""
)
assert isinstance(resp, ErrorResponse)
assert resp.error == "missing_target"

View File

@@ -0,0 +1,20 @@
"""Local conftest for copilot/tools tests.
Overrides the session-scoped `server` and `graph_cleanup` autouse fixtures from
backend/conftest.py so that integration tests in this directory do not trigger
the full SpinTestServer startup (which requires Postgres + RabbitMQ).
"""
import pytest_asyncio
@pytest_asyncio.fixture(scope="session", loop_scope="session")
async def server(): # type: ignore[override]
"""No-op server stub — tools tests don't need the full backend."""
return None
@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
async def graph_cleanup(): # type: ignore[override]
"""No-op graph cleanup stub."""
yield

View File

@@ -66,6 +66,9 @@ services:
container_name: supabase-kong
image: kong:2.8.1
restart: unless-stopped
networks:
- default
- shared-network
ports:
- 8000:8000/tcp
- 8443:8443/tcp
@@ -407,6 +410,9 @@ services:
container_name: supabase-db
image: supabase/postgres:15.8.1.049
restart: unless-stopped
networks:
- default
- app-network
volumes:
- ./volumes/db/realtime.sql:/docker-entrypoint-initdb.d/migrations/99-realtime.sql:Z
# Must be superuser to create event trigger
@@ -538,5 +544,11 @@ services:
"/app/bin/migrate && /app/bin/supavisor eval \"$$(cat /etc/pooler/pooler.exs)\" && /app/bin/server"
]
networks:
shared-network:
name: shared-network
app-network:
name: app-network
volumes:
supabase-config:

View File

@@ -10,6 +10,12 @@ then
fi
echo "Stopping and removing all containers..."
# Use the platform compose to tear everything down so no orphan containers remain
# (the platform compose manages supabase containers via `extends`, using the
# standalone supabase compose here would leave orphans that conflict on next start)
if [ -f "../../docker-compose.yml" ]; then
docker compose -f ../../docker-compose.yml down -v --remove-orphans
fi
docker compose -f docker-compose.yml -f ./dev/docker-compose.dev.yml down -v --remove-orphans
echo "Cleaning up bind-mounted directories..."

View File

@@ -114,6 +114,8 @@ services:
<<: *backend-env
ports:
- "8006:8006"
volumes:
- workspace-data:/app/autogpt_platform/backend/workspaces
networks:
- app-network
logging:
@@ -185,6 +187,8 @@ services:
PYTHONUNBUFFERED: "1"
ports:
- "8008:8008"
volumes:
- workspace-data:/app/autogpt_platform/backend/workspaces
networks:
- app-network
logging:
@@ -368,6 +372,9 @@ services:
SUPABASE_URL: http://kong:8000
AGPT_SERVER_URL: http://rest_server:8006/api
AGPT_WS_SERVER_URL: ws://websocket_server:8001/ws
volumes:
workspace-data:
networks:
app-network:
driver: bridge

View File

@@ -7,6 +7,7 @@ networks:
volumes:
supabase-config:
clamav-data:
workspace-data:
x-agpt-services:
&agpt-services