mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
Compare commits
3 Commits
feat/llm-a
...
spare/9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23b65939f3 | ||
|
|
1c27eaac53 | ||
|
|
923b164794 |
@@ -346,59 +346,70 @@ agent-browser --session-name pr-test open 'http://localhost:3000/copilot' --time
|
||||
# ... fill chat input and press Enter, wait 20-30s for response
|
||||
```
|
||||
|
||||
## Step 5: Record results
|
||||
## Step 5: Record results and take screenshots
|
||||
|
||||
For each test scenario, record in `$RESULTS_DIR/test-report.md`:
|
||||
**Take a screenshot at every significant test step** — before and after interactions, on success, and on failure. Name them sequentially with descriptive names:
|
||||
|
||||
```markdown
|
||||
# E2E Test Report: PR #{N} — {title}
|
||||
Date: {date}
|
||||
Branch: {branch}
|
||||
Worktree: {path}
|
||||
|
||||
## Environment
|
||||
- Docker services: [list running containers]
|
||||
- API keys: OpenRouter={present/missing}, E2B={present/missing}
|
||||
|
||||
## Test Results
|
||||
|
||||
### Scenario 1: {name}
|
||||
**Steps:**
|
||||
1. ...
|
||||
2. ...
|
||||
**Expected:** ...
|
||||
**Actual:** ...
|
||||
**Result:** PASS / FAIL
|
||||
**Screenshot:** {filename}.png
|
||||
**Logs:** (if relevant)
|
||||
|
||||
### Scenario 2: {name}
|
||||
...
|
||||
|
||||
## Summary
|
||||
- Total: X scenarios
|
||||
- Passed: Y
|
||||
- Failed: Z
|
||||
- Bugs found: [list]
|
||||
```
|
||||
|
||||
Take screenshots at each significant step:
|
||||
```bash
|
||||
agent-browser --session-name pr-test screenshot $RESULTS_DIR/{NN}-{description}.png
|
||||
# Examples:
|
||||
# $RESULTS_DIR/01-login-page.png
|
||||
# $RESULTS_DIR/02-builder-with-block.png
|
||||
# $RESULTS_DIR/03-copilot-response.png
|
||||
# $RESULTS_DIR/04-agent-execution-result.png
|
||||
# $RESULTS_DIR/05-error-state.png
|
||||
```
|
||||
|
||||
## Step 6: Report results
|
||||
**Aim for at least one screenshot per test scenario.** More is better — screenshots are the primary evidence that tests were actually run.
|
||||
|
||||
After all tests complete, output a summary to the user:
|
||||
## Step 6: Show results to user with screenshots
|
||||
|
||||
1. Table of all scenarios with PASS/FAIL
|
||||
2. Screenshots of failures (read the PNG files to show them)
|
||||
3. Any bugs found with details
|
||||
4. Recommendations
|
||||
**CRITICAL: After all tests complete, you MUST show every screenshot to the user using the Read tool, with an explanation of what each screenshot shows.** This is the most important part of the test report — the user needs to visually verify the results.
|
||||
|
||||
### Post test results as PR comment with screenshots
|
||||
For each screenshot:
|
||||
1. Use the `Read` tool to display the PNG file (Claude can read images)
|
||||
2. Write a 1-2 sentence explanation below it describing:
|
||||
- What page/state is being shown
|
||||
- What the screenshot proves (which test scenario it validates)
|
||||
- Any notable details visible in the UI
|
||||
|
||||
Upload screenshots to the PR using the GitHub Git API (no local git operations — safe for worktrees).
|
||||
Format the output like this:
|
||||
|
||||
```markdown
|
||||
### Screenshot 1: {descriptive title}
|
||||
[Read the PNG file here]
|
||||
|
||||
**What it shows:** {1-2 sentence explanation of what this screenshot proves}
|
||||
|
||||
---
|
||||
```
|
||||
|
||||
After showing all screenshots, output a summary table:
|
||||
|
||||
| # | Scenario | Result |
|
||||
|---|----------|--------|
|
||||
| 1 | {name} | PASS/FAIL |
|
||||
| 2 | ... | ... |
|
||||
|
||||
**IMPORTANT:** As you show each screenshot and record test results, persist them in shell variables for Step 7:
|
||||
|
||||
```bash
|
||||
# Build these variables during Step 6 — they are required by Step 7's script
|
||||
declare -A SCREENSHOT_EXPLANATIONS=(
|
||||
["01-login-page.png"]="Shows the login page loaded successfully with SSO options visible."
|
||||
["02-builder-with-block.png"]="The builder canvas displays the newly added block connected to the trigger."
|
||||
# ... one entry per screenshot, using the same explanations you showed the user above
|
||||
)
|
||||
|
||||
TEST_RESULTS_TABLE="| 1 | Login flow | PASS |
|
||||
| 2 | Builder block addition | PASS |
|
||||
| 3 | Copilot chat | FAIL |"
|
||||
# ... one row per test scenario with actual results
|
||||
```
|
||||
|
||||
## Step 7: Post test report as PR comment with screenshots
|
||||
|
||||
Upload screenshots to the PR using the GitHub Git API (no local git operations — safe for worktrees), then post a comment with inline images and per-screenshot explanations.
|
||||
|
||||
```bash
|
||||
# Upload screenshots via GitHub Git API (creates blobs, tree, commit, and ref remotely)
|
||||
@@ -406,17 +417,7 @@ REPO="Significant-Gravitas/AutoGPT"
|
||||
SCREENSHOTS_BRANCH="test-screenshots/pr-${PR_NUMBER}"
|
||||
SCREENSHOTS_DIR="test-screenshots/PR-${PR_NUMBER}"
|
||||
|
||||
# Step 1: Create blobs for each screenshot
|
||||
declare -a TREE_ENTRIES
|
||||
for img in $RESULTS_DIR/*.png; do
|
||||
BASENAME=$(basename "$img")
|
||||
B64=$(base64 < "$img")
|
||||
BLOB_SHA=$(gh api "repos/${REPO}/git/blobs" -f content="$B64" -f encoding="base64" --jq '.sha')
|
||||
TREE_ENTRIES+=("-f" "tree[][path]=${SCREENSHOTS_DIR}/${BASENAME}" "-f" "tree[][mode]=100644" "-f" "tree[][type]=blob" "-f" "tree[][sha]=${BLOB_SHA}")
|
||||
done
|
||||
|
||||
# Step 2: Create a tree with all screenshot blobs
|
||||
# Build the tree JSON manually since gh api doesn't handle arrays well
|
||||
# Step 1: Create blobs for each screenshot and build tree JSON
|
||||
TREE_JSON='['
|
||||
FIRST=true
|
||||
for img in $RESULTS_DIR/*.png; do
|
||||
@@ -428,42 +429,59 @@ for img in $RESULTS_DIR/*.png; do
|
||||
done
|
||||
TREE_JSON+=']'
|
||||
|
||||
TREE_SHA=$(echo "$TREE_JSON" | gh api "repos/${REPO}/git/trees" --input - -f base_tree="" --jq '.sha' 2>/dev/null \
|
||||
|| echo "$TREE_JSON" | jq -c '{tree: .}' | gh api "repos/${REPO}/git/trees" --input - --jq '.sha')
|
||||
|
||||
# Step 3: Create a commit pointing to that tree
|
||||
# Step 2: Create tree, commit, and branch ref
|
||||
TREE_SHA=$(echo "$TREE_JSON" | jq -c '{tree: .}' | gh api "repos/${REPO}/git/trees" --input - --jq '.sha')
|
||||
COMMIT_SHA=$(gh api "repos/${REPO}/git/commits" \
|
||||
-f message="test: add E2E test screenshots for PR #${PR_NUMBER}" \
|
||||
-f tree="$TREE_SHA" \
|
||||
--jq '.sha')
|
||||
|
||||
# Step 4: Create or update the ref (branch) — no local checkout needed
|
||||
gh api "repos/${REPO}/git/refs" \
|
||||
-f ref="refs/heads/${SCREENSHOTS_BRANCH}" \
|
||||
-f sha="$COMMIT_SHA" 2>/dev/null \
|
||||
|| gh api "repos/${REPO}/git/refs/heads/${SCREENSHOTS_BRANCH}" \
|
||||
-X PATCH -f sha="$COMMIT_SHA" -f force=true
|
||||
```
|
||||
|
||||
# Step 5: Build image markdown and post the comment
|
||||
Then post the comment with **inline images AND explanations for each screenshot**:
|
||||
|
||||
```bash
|
||||
REPO_URL="https://raw.githubusercontent.com/${REPO}/${SCREENSHOTS_BRANCH}"
|
||||
|
||||
# Build image markdown using SCREENSHOT_EXPLANATIONS and TEST_RESULTS_TABLE from Step 6
|
||||
|
||||
IMAGE_MARKDOWN=""
|
||||
for img in $RESULTS_DIR/*.png; do
|
||||
BASENAME=$(basename "$img")
|
||||
IMAGE_MARKDOWN="$IMAGE_MARKDOWN
|
||||
"
|
||||
TITLE=$(echo "${BASENAME%.png}" | sed 's/^[0-9]*-//' | sed 's/-/ /g' | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2))}1')
|
||||
EXPLANATION="${SCREENSHOT_EXPLANATIONS[$BASENAME]}"
|
||||
IMAGE_MARKDOWN="${IMAGE_MARKDOWN}
|
||||
### ${TITLE}
|
||||

|
||||
${EXPLANATION}
|
||||
"
|
||||
done
|
||||
|
||||
gh api "repos/${REPO}/issues/$PR_NUMBER/comments" -f body="$(cat <<EOF
|
||||
# Write comment body to file to avoid shell interpretation issues with special characters
|
||||
COMMENT_FILE=$(mktemp)
|
||||
cat > "$COMMENT_FILE" <<INNEREOF
|
||||
## 🧪 E2E Test Report
|
||||
|
||||
$(cat $RESULTS_DIR/test-report.md)
|
||||
| # | Scenario | Result |
|
||||
|---|----------|--------|
|
||||
${TEST_RESULTS_TABLE}
|
||||
|
||||
### Screenshots
|
||||
${IMAGE_MARKDOWN}
|
||||
EOF
|
||||
)"
|
||||
INNEREOF
|
||||
|
||||
gh api "repos/${REPO}/issues/$PR_NUMBER/comments" -F body=@"$COMMENT_FILE"
|
||||
rm -f "$COMMENT_FILE"
|
||||
```
|
||||
|
||||
**The PR comment MUST include:**
|
||||
1. A summary table of all scenarios with PASS/FAIL
|
||||
2. Every screenshot rendered inline (not just linked)
|
||||
3. A 1-2 sentence explanation below each screenshot describing what it proves
|
||||
|
||||
This approach uses the GitHub Git API to create blobs, trees, commits, and refs entirely server-side. No local `git checkout` or `git push` — safe for worktrees and won't interfere with the PR branch.
|
||||
|
||||
## Fix mode (--fix flag)
|
||||
|
||||
@@ -121,36 +121,20 @@ RUN ln -s ../lib/node_modules/npm/bin/npm-cli.js /usr/bin/npm \
|
||||
&& ln -s ../lib/node_modules/npm/bin/npx-cli.js /usr/bin/npx
|
||||
COPY --from=builder /root/.cache/prisma-python/binaries /root/.cache/prisma-python/binaries
|
||||
|
||||
# Install agent-browser (Copilot browser tool) + Chromium.
|
||||
# On amd64: install runtime libs + run `agent-browser install` to download
|
||||
# Chrome for Testing (pinned version, tested with Playwright).
|
||||
# On arm64: install system chromium package — Chrome for Testing has no ARM64
|
||||
# binary. AGENT_BROWSER_EXECUTABLE_PATH is set at runtime by the entrypoint
|
||||
# script (below) to redirect agent-browser to the system binary.
|
||||
ARG TARGETARCH
|
||||
# Install agent-browser (Copilot browser tool) using the system chromium package.
|
||||
# Chrome for Testing (the binary agent-browser downloads via `agent-browser install`)
|
||||
# has no ARM64 builds, so we use the distro-packaged chromium instead — verified to
|
||||
# work with agent-browser via Docker tests on arm64; amd64 is validated in CI.
|
||||
# Note: system chromium tracks the Debian package schedule rather than a pinned
|
||||
# Chrome for Testing release. If agent-browser requires a specific Chrome version,
|
||||
# verify compatibility against the chromium package version in the base image.
|
||||
RUN apt-get update \
|
||||
&& if [ "$TARGETARCH" = "arm64" ]; then \
|
||||
apt-get install -y --no-install-recommends chromium fonts-liberation; \
|
||||
else \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \
|
||||
libdbus-1-3 libxkbcommon0 libatspi2.0-0t64 libxcomposite1 libxdamage1 \
|
||||
libxfixes3 libxrandr2 libgbm1 libasound2t64 libpango-1.0-0 libcairo2 \
|
||||
libx11-6 libx11-xcb1 libxcb1 libxext6 libglib2.0-0t64 \
|
||||
fonts-liberation libfontconfig1; \
|
||||
fi \
|
||||
&& apt-get install -y --no-install-recommends chromium fonts-liberation \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& npm install -g agent-browser \
|
||||
&& ([ "$TARGETARCH" = "arm64" ] || agent-browser install) \
|
||||
&& rm -rf /tmp/* /root/.npm
|
||||
|
||||
# On arm64 the system chromium is at /usr/bin/chromium; set
|
||||
# AGENT_BROWSER_EXECUTABLE_PATH so agent-browser's daemon uses it instead of
|
||||
# Chrome for Testing (which has no ARM64 binary). On amd64 the variable is left
|
||||
# unset so agent-browser uses the Chrome for Testing binary it downloaded above.
|
||||
RUN printf '#!/bin/sh\n[ -x /usr/bin/chromium ] && export AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium\nexec "$@"\n' \
|
||||
> /usr/local/bin/entrypoint.sh \
|
||||
&& chmod +x /usr/local/bin/entrypoint.sh
|
||||
ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
|
||||
WORKDIR /app/autogpt_platform/backend
|
||||
|
||||
@@ -173,5 +157,4 @@ RUN POETRY_VIRTUALENVS_CREATE=true POETRY_VIRTUALENVS_IN_PROJECT=true \
|
||||
|
||||
ENV PORT=8000
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
||||
CMD ["rest"]
|
||||
|
||||
@@ -20,9 +20,9 @@ SSRF protection:
|
||||
|
||||
Requires:
|
||||
npm install -g agent-browser
|
||||
agent-browser install (downloads Chromium, one-time — skipped in Docker
|
||||
where system chromium is pre-installed and
|
||||
AGENT_BROWSER_EXECUTABLE_PATH is set)
|
||||
In Docker: system chromium package with AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
(set automatically — no `agent-browser install` needed).
|
||||
Locally: run `agent-browser install` to download Chromium.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
||||
@@ -0,0 +1,351 @@
|
||||
"""Integration tests for agent-browser + system chromium.
|
||||
|
||||
These tests actually invoke the agent-browser binary via subprocess and require:
|
||||
- agent-browser installed (npm install -g agent-browser)
|
||||
- AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium (set in Docker)
|
||||
|
||||
Run with:
|
||||
poetry run test
|
||||
|
||||
Or to run only this file:
|
||||
poetry run pytest backend/copilot/tools/agent_browser_integration_test.py -v -p no:autogpt_platform
|
||||
|
||||
Skipped automatically when agent-browser binary is not found.
|
||||
Tests that hit external sites are marked ``integration`` and skipped by default
|
||||
in CI (use ``-m integration`` to include them).
|
||||
|
||||
Two test tiers:
|
||||
- CLI tests: call agent-browser subprocess directly (no backend imports needed)
|
||||
- Tool class tests: call BrowserNavigateTool/BrowserActTool._execute() directly
|
||||
with user_id=None (skips workspace/DB interactions — no Postgres/RabbitMQ needed)
|
||||
"""
|
||||
|
||||
import concurrent.futures
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.copilot.model import ChatSession
|
||||
from backend.copilot.tools.agent_browser import BrowserActTool, BrowserNavigateTool
|
||||
from backend.copilot.tools.models import (
|
||||
BrowserActResponse,
|
||||
BrowserNavigateResponse,
|
||||
ErrorResponse,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
shutil.which("agent-browser") is None,
|
||||
reason="agent-browser binary not found",
|
||||
)
|
||||
|
||||
_SESSION = "integration-test-session"
|
||||
|
||||
|
||||
def _agent_browser(
|
||||
*args: str, session: str = _SESSION, timeout: int = 30
|
||||
) -> tuple[int, str, str]:
|
||||
"""Run agent-browser for the given session, return (rc, stdout, stderr)."""
|
||||
result = subprocess.run(
|
||||
["agent-browser", "--session", session, "--session-name", session, *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
return result.returncode, result.stdout, result.stderr
|
||||
|
||||
|
||||
def _close_session(session: str, timeout: int = 5) -> None:
|
||||
"""Best-effort close for a browser session; never raises on failure."""
|
||||
try:
|
||||
subprocess.run(
|
||||
["agent-browser", "--session", session, "--session-name", session, "close"],
|
||||
capture_output=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _teardown():
|
||||
"""Close the shared test session after each test (best-effort)."""
|
||||
yield
|
||||
_close_session(_SESSION)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_chromium_executable_env_is_set():
|
||||
"""AGENT_BROWSER_EXECUTABLE_PATH must be set and point to an executable binary."""
|
||||
exe = os.environ.get("AGENT_BROWSER_EXECUTABLE_PATH", "")
|
||||
assert exe, "AGENT_BROWSER_EXECUTABLE_PATH is not set"
|
||||
assert os.path.isfile(exe), f"Chromium binary not found at {exe}"
|
||||
assert os.access(exe, os.X_OK), f"Chromium binary at {exe} is not executable"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_navigate_returns_success():
|
||||
"""agent-browser can open a public URL using system chromium."""
|
||||
rc, _, stderr = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0, f"open failed (rc={rc}): {stderr}"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_title_after_navigate():
|
||||
"""get title returns the page title after navigation."""
|
||||
rc, _, _ = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0
|
||||
|
||||
rc, stdout, stderr = _agent_browser("get", "title", timeout=10)
|
||||
assert rc == 0, f"get title failed: {stderr}"
|
||||
assert "example" in stdout.lower()
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_url_after_navigate():
|
||||
"""get url returns the navigated URL."""
|
||||
rc, _, _ = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0
|
||||
|
||||
rc, stdout, stderr = _agent_browser("get", "url", timeout=10)
|
||||
assert rc == 0, f"get url failed: {stderr}"
|
||||
assert urlparse(stdout.strip()).netloc == "example.com"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_snapshot_returns_interactive_elements():
|
||||
"""snapshot -i -c lists interactive elements on the page."""
|
||||
rc, _, _ = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0
|
||||
|
||||
rc, stdout, stderr = _agent_browser("snapshot", "-i", "-c", timeout=15)
|
||||
assert rc == 0, f"snapshot failed: {stderr}"
|
||||
assert len(stdout.strip()) > 0, "snapshot returned empty output"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_screenshot_produces_valid_png():
|
||||
"""screenshot saves a non-empty, valid PNG file."""
|
||||
rc, _, _ = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
|
||||
tmp = f.name
|
||||
try:
|
||||
rc, _, stderr = _agent_browser("screenshot", tmp, timeout=15)
|
||||
assert rc == 0, f"screenshot failed: {stderr}"
|
||||
size = os.path.getsize(tmp)
|
||||
assert size > 1000, f"PNG too small ({size} bytes) — likely blank or corrupt"
|
||||
with open(tmp, "rb") as f:
|
||||
assert f.read(4) == b"\x89PNG", "Output is not a valid PNG"
|
||||
finally:
|
||||
os.unlink(tmp)
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_scroll_down():
|
||||
"""scroll down succeeds without error."""
|
||||
rc, _, _ = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0
|
||||
|
||||
rc, _, stderr = _agent_browser("scroll", "down", timeout=10)
|
||||
assert rc == 0, f"scroll failed: {stderr}"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_fill_form_field():
|
||||
"""fill writes text into an input field."""
|
||||
rc, _, _ = _agent_browser("open", "https://httpbin.org/forms/post")
|
||||
assert rc == 0
|
||||
|
||||
rc, _, stderr = _agent_browser(
|
||||
"fill", "input[name=custname]", "IntegrationTestUser", timeout=10
|
||||
)
|
||||
assert rc == 0, f"fill failed: {stderr}"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_concurrent_independent_sessions():
|
||||
"""Two independent sessions can navigate in parallel without interference."""
|
||||
session_a = "integration-concurrent-a"
|
||||
session_b = "integration-concurrent-b"
|
||||
|
||||
try:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
|
||||
fut_a = pool.submit(
|
||||
_agent_browser, "open", "https://example.com", session=session_a
|
||||
)
|
||||
fut_b = pool.submit(
|
||||
_agent_browser, "open", "https://httpbin.org/html", session=session_b
|
||||
)
|
||||
rc_a, _, err_a = fut_a.result(timeout=40)
|
||||
rc_b, _, err_b = fut_b.result(timeout=40)
|
||||
assert rc_a == 0, f"session_a open failed: {err_a}"
|
||||
assert rc_b == 0, f"session_b open failed: {err_b}"
|
||||
|
||||
rc_ua, url_a, err_ua = _agent_browser(
|
||||
"get", "url", session=session_a, timeout=10
|
||||
)
|
||||
rc_ub, url_b, err_ub = _agent_browser(
|
||||
"get", "url", session=session_b, timeout=10
|
||||
)
|
||||
assert rc_ua == 0, f"session_a get url failed: {err_ua}"
|
||||
assert rc_ub == 0, f"session_b get url failed: {err_ub}"
|
||||
assert urlparse(url_a.strip()).netloc == "example.com"
|
||||
assert urlparse(url_b.strip()).netloc == "httpbin.org"
|
||||
finally:
|
||||
_close_session(session_a)
|
||||
_close_session(session_b)
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_close_session():
|
||||
"""close shuts down the browser daemon cleanly."""
|
||||
rc, _, _ = _agent_browser("open", "https://example.com")
|
||||
assert rc == 0
|
||||
|
||||
rc, _, stderr = _agent_browser("close", timeout=10)
|
||||
assert rc == 0, f"close failed: {stderr}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Python tool class integration tests
|
||||
#
|
||||
# These tests exercise the actual BrowserNavigateTool / BrowserActTool Python
|
||||
# classes (not just the CLI binary) to verify the full call path — URL
|
||||
# validation, subprocess dispatch, response parsing — works with system
|
||||
# chromium. user_id=None skips workspace/DB interactions so no Postgres or
|
||||
# RabbitMQ is needed.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TOOL_SESSION_ID = "integration-tool-test-session"
|
||||
_TEST_SESSION = ChatSession(
|
||||
session_id=_TOOL_SESSION_ID,
|
||||
user_id="test-user",
|
||||
messages=[],
|
||||
usage=[],
|
||||
started_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=False)
|
||||
def _close_tool_session():
|
||||
"""Tear down the tool-test browser session after each tool test."""
|
||||
yield
|
||||
_close_session(_TOOL_SESSION_ID)
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_navigate_returns_response(_close_tool_session):
|
||||
"""BrowserNavigateTool._execute returns a BrowserNavigateResponse with real content."""
|
||||
tool = BrowserNavigateTool()
|
||||
resp = await tool._execute(
|
||||
user_id=None, session=_TEST_SESSION, url="https://example.com"
|
||||
)
|
||||
assert isinstance(
|
||||
resp, BrowserNavigateResponse
|
||||
), f"Expected BrowserNavigateResponse, got: {resp}"
|
||||
assert urlparse(resp.url).netloc == "example.com"
|
||||
assert resp.title, "Expected non-empty page title"
|
||||
assert resp.snapshot, "Expected non-empty accessibility snapshot"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"ssrf_url",
|
||||
[
|
||||
"http://169.254.169.254/", # AWS/GCP/Azure metadata endpoint
|
||||
"http://127.0.0.1/", # IPv4 loopback
|
||||
"http://10.0.0.1/", # RFC-1918 private range
|
||||
"http://[::1]/", # IPv6 loopback
|
||||
"http://0.0.0.0/", # Wildcard / INADDR_ANY
|
||||
],
|
||||
)
|
||||
async def test_tool_navigate_blocked_url(ssrf_url: str, _close_tool_session):
|
||||
"""BrowserNavigateTool._execute rejects internal/private URLs (SSRF guard)."""
|
||||
tool = BrowserNavigateTool()
|
||||
resp = await tool._execute(user_id=None, session=_TEST_SESSION, url=ssrf_url)
|
||||
assert isinstance(
|
||||
resp, ErrorResponse
|
||||
), f"Expected ErrorResponse for SSRF URL {ssrf_url!r}, got: {resp}"
|
||||
assert resp.error == "blocked_url"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_navigate_missing_url(_close_tool_session):
|
||||
"""BrowserNavigateTool._execute returns an error when url is empty."""
|
||||
tool = BrowserNavigateTool()
|
||||
resp = await tool._execute(user_id=None, session=_TEST_SESSION, url="")
|
||||
assert isinstance(resp, ErrorResponse)
|
||||
assert resp.error == "missing_url"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_act_scroll(_close_tool_session):
|
||||
"""BrowserActTool._execute can scroll after a navigate."""
|
||||
nav = BrowserNavigateTool()
|
||||
nav_resp = await nav._execute(
|
||||
user_id=None, session=_TEST_SESSION, url="https://example.com"
|
||||
)
|
||||
assert isinstance(nav_resp, BrowserNavigateResponse)
|
||||
|
||||
act = BrowserActTool()
|
||||
resp = await act._execute(
|
||||
user_id=None, session=_TEST_SESSION, action="scroll", direction="down"
|
||||
)
|
||||
assert isinstance(
|
||||
resp, BrowserActResponse
|
||||
), f"Expected BrowserActResponse, got: {resp}"
|
||||
assert resp.action == "scroll"
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_act_fill_and_click(_close_tool_session):
|
||||
"""BrowserActTool._execute can fill a form field."""
|
||||
nav = BrowserNavigateTool()
|
||||
nav_resp = await nav._execute(
|
||||
user_id=None, session=_TEST_SESSION, url="https://httpbin.org/forms/post"
|
||||
)
|
||||
assert isinstance(nav_resp, BrowserNavigateResponse)
|
||||
|
||||
act = BrowserActTool()
|
||||
resp = await act._execute(
|
||||
user_id=None,
|
||||
session=_TEST_SESSION,
|
||||
action="fill",
|
||||
target="input[name=custname]",
|
||||
value="ToolIntegrationTest",
|
||||
)
|
||||
assert isinstance(resp, BrowserActResponse), f"fill failed: {resp}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_act_missing_action(_close_tool_session):
|
||||
"""BrowserActTool._execute returns an error when action is missing."""
|
||||
act = BrowserActTool()
|
||||
resp = await act._execute(user_id=None, session=_TEST_SESSION, action="")
|
||||
assert isinstance(resp, ErrorResponse)
|
||||
assert resp.error == "missing_action"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_act_missing_target(_close_tool_session):
|
||||
"""BrowserActTool._execute returns an error when click target is missing."""
|
||||
act = BrowserActTool()
|
||||
resp = await act._execute(
|
||||
user_id=None, session=_TEST_SESSION, action="click", target=""
|
||||
)
|
||||
assert isinstance(resp, ErrorResponse)
|
||||
assert resp.error == "missing_target"
|
||||
20
autogpt_platform/backend/backend/copilot/tools/conftest.py
Normal file
20
autogpt_platform/backend/backend/copilot/tools/conftest.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""Local conftest for copilot/tools tests.
|
||||
|
||||
Overrides the session-scoped `server` and `graph_cleanup` autouse fixtures from
|
||||
backend/conftest.py so that integration tests in this directory do not trigger
|
||||
the full SpinTestServer startup (which requires Postgres + RabbitMQ).
|
||||
"""
|
||||
|
||||
import pytest_asyncio
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session", loop_scope="session")
|
||||
async def server(): # type: ignore[override]
|
||||
"""No-op server stub — tools tests don't need the full backend."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
|
||||
async def graph_cleanup(): # type: ignore[override]
|
||||
"""No-op graph cleanup stub."""
|
||||
yield
|
||||
@@ -38,6 +38,10 @@ POOL_TIMEOUT = os.getenv("DB_POOL_TIMEOUT")
|
||||
if POOL_TIMEOUT:
|
||||
DATABASE_URL = add_param(DATABASE_URL, "pool_timeout", POOL_TIMEOUT)
|
||||
|
||||
STMT_CACHE_SIZE = os.getenv("DB_STATEMENT_CACHE_SIZE")
|
||||
if STMT_CACHE_SIZE:
|
||||
DATABASE_URL = add_param(DATABASE_URL, "statement_cache_size", STMT_CACHE_SIZE)
|
||||
|
||||
HTTP_TIMEOUT = int(POOL_TIMEOUT) if POOL_TIMEOUT else None
|
||||
|
||||
prisma = Prisma(
|
||||
|
||||
@@ -66,6 +66,9 @@ services:
|
||||
container_name: supabase-kong
|
||||
image: kong:2.8.1
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- default
|
||||
- shared-network
|
||||
ports:
|
||||
- 8000:8000/tcp
|
||||
- 8443:8443/tcp
|
||||
@@ -407,6 +410,9 @@ services:
|
||||
container_name: supabase-db
|
||||
image: supabase/postgres:15.8.1.049
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- default
|
||||
- app-network
|
||||
volumes:
|
||||
- ./volumes/db/realtime.sql:/docker-entrypoint-initdb.d/migrations/99-realtime.sql:Z
|
||||
# Must be superuser to create event trigger
|
||||
@@ -538,5 +544,11 @@ services:
|
||||
"/app/bin/migrate && /app/bin/supavisor eval \"$$(cat /etc/pooler/pooler.exs)\" && /app/bin/server"
|
||||
]
|
||||
|
||||
networks:
|
||||
shared-network:
|
||||
name: shared-network
|
||||
app-network:
|
||||
name: app-network
|
||||
|
||||
volumes:
|
||||
supabase-config:
|
||||
|
||||
@@ -10,6 +10,12 @@ then
|
||||
fi
|
||||
|
||||
echo "Stopping and removing all containers..."
|
||||
# Use the platform compose to tear everything down so no orphan containers remain
|
||||
# (the platform compose manages supabase containers via `extends`, using the
|
||||
# standalone supabase compose here would leave orphans that conflict on next start)
|
||||
if [ -f "../../docker-compose.yml" ]; then
|
||||
docker compose -f ../../docker-compose.yml down -v --remove-orphans
|
||||
fi
|
||||
docker compose -f docker-compose.yml -f ./dev/docker-compose.dev.yml down -v --remove-orphans
|
||||
|
||||
echo "Cleaning up bind-mounted directories..."
|
||||
|
||||
@@ -114,6 +114,8 @@ services:
|
||||
<<: *backend-env
|
||||
ports:
|
||||
- "8006:8006"
|
||||
volumes:
|
||||
- workspace-data:/app/autogpt_platform/backend/workspaces
|
||||
networks:
|
||||
- app-network
|
||||
logging:
|
||||
@@ -185,6 +187,8 @@ services:
|
||||
PYTHONUNBUFFERED: "1"
|
||||
ports:
|
||||
- "8008:8008"
|
||||
volumes:
|
||||
- workspace-data:/app/autogpt_platform/backend/workspaces
|
||||
networks:
|
||||
- app-network
|
||||
logging:
|
||||
@@ -368,6 +372,9 @@ services:
|
||||
SUPABASE_URL: http://kong:8000
|
||||
AGPT_SERVER_URL: http://rest_server:8006/api
|
||||
AGPT_WS_SERVER_URL: ws://websocket_server:8001/ws
|
||||
volumes:
|
||||
workspace-data:
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
driver: bridge
|
||||
|
||||
@@ -7,6 +7,7 @@ networks:
|
||||
volumes:
|
||||
supabase-config:
|
||||
clamav-data:
|
||||
workspace-data:
|
||||
|
||||
x-agpt-services:
|
||||
&agpt-services
|
||||
|
||||
Reference in New Issue
Block a user