Add Playwright-based end-to-end testing workflow (#10116)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Graham Neubig
2025-08-14 14:59:06 -04:00
committed by GitHub
parent 3e36911038
commit 426350224b
12 changed files with 1991 additions and 16 deletions

223
.github/workflows/e2e-tests.yml vendored Normal file
View File

@@ -0,0 +1,223 @@
name: End-to-End Tests
on:
pull_request:
types: [opened, synchronize, reopened, labeled]
branches:
- main
- develop
workflow_dispatch:
jobs:
e2e-tests:
if: contains(github.event.pull_request.labels.*.name, 'end-to-end') || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
timeout-minutes: 60
env:
GITHUB_REPO_NAME: ${{ github.repository }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install poetry via pipx
uses: abatilo/actions-poetry@v3
with:
poetry-version: 2.1.3
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'poetry'
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libgtk-3-0 libnotify4 libnss3 libxss1 libxtst6 xauth xvfb libgbm1 libasound2t64 netcat-openbsd
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '22'
cache: 'npm'
cache-dependency-path: 'frontend/package-lock.json'
- name: Setup environment for end-to-end tests
run: |
# Create test results directory
mkdir -p test-results
# Create downloads directory for OpenHands (use a directory in the home folder)
mkdir -p $HOME/downloads
sudo chown -R $USER:$USER $HOME/downloads
sudo chmod -R 755 $HOME/downloads
- name: Build OpenHands
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
LLM_MODEL: ${{ secrets.LLM_MODEL || 'gpt-4o' }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY || 'test-key' }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
INSTALL_DOCKER: 1
RUNTIME: docker
FRONTEND_PORT: 12000
FRONTEND_HOST: 0.0.0.0
BACKEND_HOST: 0.0.0.0
BACKEND_PORT: 3000
ENABLE_BROWSER: true
INSTALL_PLAYWRIGHT: 1
run: |
# Fix poetry.lock file if needed
echo "Fixing poetry.lock file if needed..."
poetry lock
# Build OpenHands using make build
echo "Running make build..."
make build
# Install Chromium Headless Shell for Playwright (needed for pytest-playwright)
echo "Installing Chromium Headless Shell for Playwright..."
poetry run playwright install chromium-headless-shell
# Verify Playwright browsers are installed (for e2e tests only)
echo "Verifying Playwright browsers installation for e2e tests..."
BROWSER_CHECK=$(poetry run python tests/e2e/check_playwright.py 2>/dev/null)
if [ "$BROWSER_CHECK" != "chromium_found" ]; then
echo "ERROR: Chromium browser not found or not working for e2e tests"
echo "$BROWSER_CHECK"
exit 1
else
echo "Playwright browsers are properly installed for e2e tests."
fi
# Docker runtime will handle workspace directory creation
# Start the application using make run with custom parameters and reduced logging
echo "Starting OpenHands using make run..."
# Set environment variables to reduce logging verbosity
export PYTHONUNBUFFERED=1
export LOG_LEVEL=WARNING
export UVICORN_LOG_LEVEL=warning
export OPENHANDS_LOG_LEVEL=WARNING
FRONTEND_PORT=12000 FRONTEND_HOST=0.0.0.0 BACKEND_HOST=0.0.0.0 make run > /tmp/openhands-e2e-test.log 2>&1 &
# Store the PID of the make run process
MAKE_PID=$!
echo "OpenHands started with PID: $MAKE_PID"
# Wait for the application to start
echo "Waiting for OpenHands to start..."
max_attempts=15
attempt=1
while [ $attempt -le $max_attempts ]; do
echo "Checking if OpenHands is running (attempt $attempt of $max_attempts)..."
# Check if the process is still running
if ! ps -p $MAKE_PID > /dev/null; then
echo "ERROR: OpenHands process has terminated unexpectedly"
echo "Last 50 lines of the log:"
tail -n 50 /tmp/openhands-e2e-test.log
exit 1
fi
# Check if frontend port is open
if nc -z localhost 12000; then
# Verify we can get HTML content
if curl -s http://localhost:12000 | grep -q "<html"; then
echo "SUCCESS: OpenHands is running and serving HTML content on port 12000"
break
else
echo "Port 12000 is open but not serving HTML content yet"
fi
else
echo "Frontend port 12000 is not open yet"
fi
# Show log output on each attempt
echo "Recent log output:"
tail -n 20 /tmp/openhands-e2e-test.log
# Wait before next attempt
echo "Waiting 10 seconds before next check..."
sleep 10
attempt=$((attempt + 1))
# Exit if we've reached the maximum number of attempts
if [ $attempt -gt $max_attempts ]; then
echo "ERROR: OpenHands failed to start after $max_attempts attempts"
echo "Last 50 lines of the log:"
tail -n 50 /tmp/openhands-e2e-test.log
exit 1
fi
done
# Final verification that the app is running
if ! nc -z localhost 12000 || ! curl -s http://localhost:12000 | grep -q "<html"; then
echo "ERROR: OpenHands is not running properly on port 12000"
echo "Last 50 lines of the log:"
tail -n 50 /tmp/openhands-e2e-test.log
exit 1
fi
# Print success message
echo "OpenHands is running successfully on port 12000"
- name: Run end-to-end tests
env:
GITHUB_TOKEN: ${{ secrets.E2E_TEST_GITHUB_TOKEN }}
LLM_MODEL: ${{ secrets.LLM_MODEL || 'gpt-4o' }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY || 'test-key' }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
run: |
# Check if the application is running
if ! nc -z localhost 12000; then
echo "ERROR: OpenHands is not running on port 12000"
echo "Last 50 lines of the log:"
tail -n 50 /tmp/openhands-e2e-test.log
exit 1
fi
# Run the tests with detailed output
cd tests/e2e
poetry run python -m pytest test_e2e_workflow.py::test_github_token_configuration test_e2e_workflow.py::test_conversation_start -v --no-header --capture=no --timeout=600
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: playwright-report
path: tests/e2e/test-results/
retention-days: 30
- name: Upload OpenHands logs
if: always()
uses: actions/upload-artifact@v4
with:
name: openhands-logs
path: |
/tmp/openhands-e2e-test.log
/tmp/openhands-e2e-build.log
/tmp/openhands-backend.log
/tmp/openhands-frontend.log
/tmp/backend-health-check.log
/tmp/frontend-check.log
/tmp/vite-config.log
/tmp/makefile-contents.log
retention-days: 30
- name: Cleanup
if: always()
run: |
# Stop OpenHands processes
echo "Stopping OpenHands processes..."
pkill -f "python -m openhands.server" || true
pkill -f "npm run dev" || true
pkill -f "make run" || true
# Print process status for debugging
echo "Checking if any OpenHands processes are still running:"
ps aux | grep -E "openhands|npm run dev" || true

View File

@@ -51,8 +51,6 @@ jobs:
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
- name: Run Runtime Tests with CLIRuntime
run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
- name: Run E2E Tests
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e
# Run specific Windows python tests
test-on-windows:

3
.gitignore vendored
View File

@@ -254,3 +254,6 @@ containers/runtime/Dockerfile
containers/runtime/project.tar.gz
containers/runtime/code
**/node_modules/
# test results
test-results

View File

@@ -6114,6 +6114,60 @@
"node": ">=14.0.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/wasi-threads": "1.0.2",
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
"version": "1.0.2",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
"version": "0.2.11",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/core": "^1.4.3",
"@emnapi/runtime": "^1.4.3",
"@tybys/wasm-util": "^0.9.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
"version": "0.9.0",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
"version": "2.8.0",
"inBundle": true,
"license": "0BSD",
"optional": true
},
"node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
"version": "4.1.11",
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",

View File

@@ -31,6 +31,52 @@ export default defineConfig(({ mode }) => {
svgr(),
tailwindcss(),
],
optimizeDeps: {
include: [
// Pre-bundle ALL dependencies to prevent runtime optimization and page reloads
// These are discovered during initial app load:
"react-redux",
"posthog-js",
"@tanstack/react-query",
"react-hot-toast",
"@reduxjs/toolkit",
"i18next",
"i18next-http-backend",
"i18next-browser-languagedetector",
"react-i18next",
"axios",
"date-fns",
"@uidotdev/usehooks",
"react-icons/fa6",
"react-icons/fa",
"clsx",
"tailwind-merge",
"@heroui/react",
"lucide-react",
"react-select",
"react-select/async",
"@microlink/react-json-view",
"socket.io-client",
// These are discovered when launching conversations:
"react-icons/vsc",
"react-icons/lu",
"react-icons/di",
"react-icons/io5",
"react-icons/io", // Added to prevent runtime optimization
"@monaco-editor/react",
"react-textarea-autosize",
"react-markdown",
"remark-gfm",
"remark-breaks",
"react-syntax-highlighter",
"react-syntax-highlighter/dist/esm/styles/prism",
"react-syntax-highlighter/dist/esm/styles/hljs",
// Terminal dependencies - added to prevent runtime optimization
"@xterm/addon-fit",
"@xterm/xterm",
"@xterm/xterm/css/xterm.css",
],
},
server: {
port: FE_PORT,
host: true,

110
poetry.lock generated
View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
[[package]]
name = "aiofiles"
@@ -404,7 +404,7 @@ description = "LTS Port of Python audioop"
optional = false
python-versions = ">=3.13"
groups = ["main"]
markers = "python_version == \"3.13\""
markers = "python_version >= \"3.13\""
files = [
{file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd1345ae99e17e6910f47ce7d52673c6a1a70820d78b67de1b7abb3af29c426a"},
{file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:e175350da05d2087e12cea8e72a70a1a8b14a17e92ed2022952a4419689ede5e"},
@@ -2997,8 +2997,8 @@ files = [
google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
proto-plus = [
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0dev"},
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
]
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
@@ -3020,8 +3020,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.0"
grpcio = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
grpcio-status = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
proto-plus = [
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0"},
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
]
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
requests = ">=2.18.0,<3.0.0"
@@ -3239,8 +3239,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
grpc-google-iam-v1 = ">=0.14.0,<1.0.0"
proto-plus = [
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.22.3,<2.0.0"},
{version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
]
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
@@ -3462,7 +3462,6 @@ files = [
{file = "greenlet-3.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:eeb27bece45c0c2a5842ac4c5a1b5c2ceaefe5711078eed4e8043159fa05c834"},
{file = "greenlet-3.2.2.tar.gz", hash = "sha256:ad053d34421a2debba45aa3cc39acf454acbcd025b3fc1a9f8a0dee237abd485"},
]
markers = {test = "platform_python_implementation == \"CPython\""}
[package.extras]
docs = ["Sphinx", "furo"]
@@ -6664,8 +6663,8 @@ files = [
[package.dependencies]
googleapis-common-protos = ">=1.52,<2.0"
grpcio = [
{version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
{version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
]
opentelemetry-api = ">=1.15,<2.0"
opentelemetry-exporter-otlp-proto-common = "1.34.1"
@@ -7063,7 +7062,7 @@ version = "1.52.0"
description = "A high-level API to automate web browsers"
optional = false
python-versions = ">=3.9"
groups = ["main", "evaluation"]
groups = ["main", "evaluation", "test"]
files = [
{file = "playwright-1.52.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:19b2cb9d4794062008a635a99bd135b03ebb782d460f96534a91cb583f549512"},
{file = "playwright-1.52.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0797c0479cbdc99607412a3c486a3a2ec9ddc77ac461259fd2878c975bcbb94a"},
@@ -7737,7 +7736,7 @@ version = "13.0.0"
description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own"
optional = false
python-versions = ">=3.8"
groups = ["main", "evaluation"]
groups = ["main", "evaluation", "test"]
files = [
{file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"},
{file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"},
@@ -7975,6 +7974,25 @@ pytest = ">=8.2,<9"
docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
[[package]]
name = "pytest-base-url"
version = "2.1.0"
description = "pytest plugin for URL based testing"
optional = false
python-versions = ">=3.8"
groups = ["test"]
files = [
{file = "pytest_base_url-2.1.0-py3-none-any.whl", hash = "sha256:3ad15611778764d451927b2a53240c1a7a591b521ea44cebfe45849d2d2812e6"},
{file = "pytest_base_url-2.1.0.tar.gz", hash = "sha256:02748589a54f9e63fcbe62301d6b0496da0d10231b753e950c63e03aee745d45"},
]
[package.dependencies]
pytest = ">=7.0.0"
requests = ">=2.9"
[package.extras]
test = ["black (>=22.1.0)", "flake8 (>=4.0.1)", "pre-commit (>=2.17.0)", "pytest-localserver (>=0.7.1)", "tox (>=3.24.5)"]
[[package]]
name = "pytest-cov"
version = "6.2.1"
@@ -8011,6 +8029,39 @@ files = [
py = "*"
pytest = ">=3.10"
[[package]]
name = "pytest-playwright"
version = "0.7.0"
description = "A pytest wrapper with fixtures for Playwright to automate web browsers"
optional = false
python-versions = ">=3.9"
groups = ["test"]
files = [
{file = "pytest_playwright-0.7.0-py3-none-any.whl", hash = "sha256:2516d0871fa606634bfe32afbcc0342d68da2dbff97fe3459849e9c428486da2"},
{file = "pytest_playwright-0.7.0.tar.gz", hash = "sha256:b3f2ea514bbead96d26376fac182f68dcd6571e7cb41680a89ff1673c05d60b6"},
]
[package.dependencies]
playwright = ">=1.18"
pytest = ">=6.2.4,<9.0.0"
pytest-base-url = ">=1.0.0,<3.0.0"
python-slugify = ">=6.0.0,<9.0.0"
[[package]]
name = "pytest-timeout"
version = "2.4.0"
description = "pytest plugin to abort hanging tests"
optional = false
python-versions = ">=3.7"
groups = ["test"]
files = [
{file = "pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"},
{file = "pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a"},
]
[package.dependencies]
pytest = ">=7.0.0"
[[package]]
name = "pytest-xdist"
version = "3.8.0"
@@ -8177,6 +8228,24 @@ Pillow = ">=3.3.2"
typing-extensions = ">=4.9.0"
XlsxWriter = ">=0.5.7"
[[package]]
name = "python-slugify"
version = "8.0.4"
description = "A Python slugify application that also handles Unicode"
optional = false
python-versions = ">=3.7"
groups = ["test"]
files = [
{file = "python-slugify-8.0.4.tar.gz", hash = "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856"},
{file = "python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8"},
]
[package.dependencies]
text-unidecode = ">=1.3"
[package.extras]
unidecode = ["Unidecode (>=1.1.1)"]
[[package]]
name = "python-socketio"
version = "5.13.0"
@@ -8769,7 +8838,7 @@ version = "2.32.3"
description = "Python HTTP for Humans."
optional = false
python-versions = ">=3.8"
groups = ["main", "evaluation", "runtime"]
groups = ["main", "evaluation", "runtime", "test"]
files = [
{file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
{file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -9369,6 +9438,7 @@ files = [
{file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
{file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
]
markers = {evaluation = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
[package.extras]
check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
@@ -9612,7 +9682,7 @@ description = "Standard library aifc redistribution. \"dead battery\"."
optional = false
python-versions = "*"
groups = ["main"]
markers = "python_version == \"3.13\""
markers = "python_version >= \"3.13\""
files = [
{file = "standard_aifc-3.13.0-py3-none-any.whl", hash = "sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66"},
{file = "standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43"},
@@ -9629,7 +9699,7 @@ description = "Standard library chunk redistribution. \"dead battery\"."
optional = false
python-versions = "*"
groups = ["main"]
markers = "python_version == \"3.13\""
markers = "python_version >= \"3.13\""
files = [
{file = "standard_chunk-3.13.0-py3-none-any.whl", hash = "sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c"},
{file = "standard_chunk-3.13.0.tar.gz", hash = "sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654"},
@@ -9896,6 +9966,18 @@ aiohttp = ">=3.8,<4.0"
huggingface-hub = ">=0.12,<1.0"
pydantic = ">2,<3"
[[package]]
name = "text-unidecode"
version = "1.3"
description = "The most basic Text::Unidecode port"
optional = false
python-versions = "*"
groups = ["test"]
files = [
{file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
{file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
]
[[package]]
name = "tifffile"
version = "2025.6.1"
@@ -10655,7 +10737,7 @@ version = "2.4.0"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.9"
groups = ["main", "evaluation", "runtime"]
groups = ["main", "evaluation", "runtime", "test"]
files = [
{file = "urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813"},
{file = "urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466"},
@@ -11797,4 +11879,4 @@ third-party-runtimes = ["daytona", "e2b", "modal", "runloop-api-client"]
[metadata]
lock-version = "2.1"
python-versions = "^3.12,<3.14"
content-hash = "9fd177a2dfa1eebb9212e515db93c58f82d6126cc2d131de5321d68772bc2a59"
content-hash = "dbcab8224ee537e465f51c5170d8c19e749236c7ba01268f459140c95266afd7"

View File

@@ -126,6 +126,8 @@ pytest-cov = "*"
pytest-asyncio = "*"
pytest-forked = "*"
pytest-xdist = "*"
pytest-playwright = "^0.7.0"
pytest-timeout = "^2.4.0"
openai = "*"
pandas = "*"
reportlab = "*"

112
tests/e2e/README.md Normal file
View File

@@ -0,0 +1,112 @@
# OpenHands End-to-End Tests
This directory contains end-to-end tests for the OpenHands application. These tests use Playwright to interact with the OpenHands UI and verify that the application works correctly.
## Running the Tests
### Prerequisites
- Python 3.12 or later
- Poetry
- Node.js
- Playwright
### Environment Variables
The following environment variables are required:
- `GITHUB_TOKEN`: A GitHub token with access to the repositories you want to test
- `LLM_MODEL`: The LLM model to use (e.g., "gpt-4o")
- `LLM_API_KEY`: The API key for the LLM model
Optional environment variables:
- `LLM_BASE_URL`: The base URL for the LLM API (if using a custom endpoint)
### Running Locally
To run the full end-to-end test suite locally:
```bash
cd tests/e2e
poetry run pytest test_e2e_workflow.py -v
```
This runs all tests in sequence:
1. GitHub token configuration
2. Conversation start
### Running Individual Tests
You can run individual tests directly:
```bash
cd tests/e2e
# Run the GitHub token configuration test
poetry run pytest test_e2e_workflow.py::test_github_token_configuration -v
# Run the conversation start test
poetry run pytest test_e2e_workflow.py::test_conversation_start -v
```
### Running with Visible Browser
To run the tests with a visible browser (non-headless mode) so you can watch the browser interactions:
```bash
cd tests/e2e
poetry run pytest test_e2e_workflow.py::test_github_token_configuration -v --no-headless --slow-mo=50
poetry run pytest test_e2e_workflow.py::test_conversation_start -v --no-headless --slow-mo=50
```
### GitHub Workflow
The tests can also be run as part of a GitHub workflow. The workflow is triggered by:
1. Adding the "end-to-end" label to a pull request
2. Manually triggering the workflow from the GitHub Actions tab
## Test Descriptions
### GitHub Token Configuration Test
The GitHub token configuration test (`test_github_token_configuration`) performs the following steps:
1. Navigates to the OpenHands application
2. Checks if the GitHub token is already configured:
- If not configured, it navigates to the settings page and configures it
- If already configured, it verifies the repository selection is available
3. Verifies that the GitHub token is saved and the repository selection is available
### Conversation Start Test
The conversation start test (`test_conversation_start`) performs the following steps:
1. Navigates to the OpenHands application (assumes GitHub token is already configured)
2. Selects the "openhands-agent/OpenHands" repository
3. Clicks the "Launch" button
4. Waits for the conversation interface to load
5. Waits for the agent to initialize
6. Asks "How many lines are there in the main README.md file?"
7. Waits for and verifies the agent's response
### Simple Browser Navigation Test
A simple test (`test_simple_browser_navigation`) that just navigates to the OpenHands GitHub repository to verify the browser setup works correctly.
### Local Runtime Test
A separate test (`test_headless_mode_with_dummy_agent_no_browser` in `test_local_runtime.py`) that tests the local runtime with a dummy agent in headless mode.
## Troubleshooting
If the tests fail, check the following:
1. Make sure all required environment variables are set
2. Check the logs in `/tmp/openhands-e2e-test.log` and `/tmp/openhands-e2e-build.log`
3. Verify that the OpenHands application is running correctly
4. Check the Playwright test results in the `test-results` directory

View File

@@ -0,0 +1,15 @@
import sys
try:
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
if p.chromium.executable_path:
print('chromium_found')
sys.exit(0)
else:
print('chromium_not_found')
sys.exit(1)
except Exception as e:
print(f'error: {e}')
sys.exit(1)

46
tests/e2e/conftest.py Normal file
View File

@@ -0,0 +1,46 @@
import pytest
def pytest_addoption(parser):
"""Add command-line options for controlling browser behavior."""
parser.addoption(
'--headless',
action='store_true',
default=True,
help='Run browser in headless mode (default)',
)
parser.addoption(
'--no-headless',
action='store_false',
dest='headless',
help='Run browser in non-headless mode to watch the browser',
)
parser.addoption(
'--slow-mo',
action='store',
default=0,
type=int,
help='Add delay between actions in milliseconds (default: 0)',
)
@pytest.fixture(scope='session')
def browser_context_args(browser_context_args):
"""Return the browser context args."""
return browser_context_args
@pytest.fixture(scope='session')
def browser_type_launch_args(request):
"""Override the browser launch arguments based on command-line options."""
headless = request.config.getoption('--headless')
slow_mo = request.config.getoption('--slow-mo')
args = {
'headless': headless,
}
if slow_mo > 0:
args['slow_mo'] = slow_mo
return args

6
tests/e2e/pytest.ini Normal file
View File

@@ -0,0 +1,6 @@
[pytest]
testpaths = tests/e2e
python_files = test_*.py
python_classes = Test*
python_functions = test_*
timeout = 300

File diff suppressed because it is too large Load Diff