Add Playwright-based end-to-end testing workflow (#10116)

Co-authored-by: openhands <openhands@all-hands.dev>
2026-01-08 22:38:05 -05:00 · 2025-08-14 14:59:06 -04:00
parent 3e36911038
commit 426350224b
12 changed files with 1991 additions and 16 deletions
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -0,0 +1,223 @@
+name: End-to-End Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, labeled]
+    branches:
+      - main
+      - develop
+  workflow_dispatch:
+
+jobs:
+  e2e-tests:
+    if: contains(github.event.pull_request.labels.*.name, 'end-to-end') || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    env:
+      GITHUB_REPO_NAME: ${{ github.repository }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install poetry via pipx
+        uses: abatilo/actions-poetry@v3
+        with:
+          poetry-version: 2.1.3
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'poetry'
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libgtk-3-0 libnotify4 libnss3 libxss1 libxtst6 xauth xvfb libgbm1 libasound2t64 netcat-openbsd
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: 'frontend/package-lock.json'
+
+      - name: Setup environment for end-to-end tests
+        run: |
+          # Create test results directory
+          mkdir -p test-results
+
+          # Create downloads directory for OpenHands (use a directory in the home folder)
+          mkdir -p $HOME/downloads
+          sudo chown -R $USER:$USER $HOME/downloads
+          sudo chmod -R 755 $HOME/downloads
+
+      - name: Build OpenHands
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL || 'gpt-4o' }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY || 'test-key' }}
+          LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
+          INSTALL_DOCKER: 1
+          RUNTIME: docker
+          FRONTEND_PORT: 12000
+          FRONTEND_HOST: 0.0.0.0
+          BACKEND_HOST: 0.0.0.0
+          BACKEND_PORT: 3000
+          ENABLE_BROWSER: true
+          INSTALL_PLAYWRIGHT: 1
+        run: |
+          # Fix poetry.lock file if needed
+          echo "Fixing poetry.lock file if needed..."
+          poetry lock
+
+          # Build OpenHands using make build
+          echo "Running make build..."
+          make build
+
+          # Install Chromium Headless Shell for Playwright (needed for pytest-playwright)
+          echo "Installing Chromium Headless Shell for Playwright..."
+          poetry run playwright install chromium-headless-shell
+
+          # Verify Playwright browsers are installed (for e2e tests only)
+          echo "Verifying Playwright browsers installation for e2e tests..."
+          BROWSER_CHECK=$(poetry run python tests/e2e/check_playwright.py 2>/dev/null)
+
+          if [ "$BROWSER_CHECK" != "chromium_found" ]; then
+            echo "ERROR: Chromium browser not found or not working for e2e tests"
+            echo "$BROWSER_CHECK"
+            exit 1
+          else
+            echo "Playwright browsers are properly installed for e2e tests."
+          fi
+
+          # Docker runtime will handle workspace directory creation
+
+          # Start the application using make run with custom parameters and reduced logging
+          echo "Starting OpenHands using make run..."
+          # Set environment variables to reduce logging verbosity
+          export PYTHONUNBUFFERED=1
+          export LOG_LEVEL=WARNING
+          export UVICORN_LOG_LEVEL=warning
+          export OPENHANDS_LOG_LEVEL=WARNING
+          FRONTEND_PORT=12000 FRONTEND_HOST=0.0.0.0 BACKEND_HOST=0.0.0.0 make run > /tmp/openhands-e2e-test.log 2>&1 &
+
+          # Store the PID of the make run process
+          MAKE_PID=$!
+          echo "OpenHands started with PID: $MAKE_PID"
+
+          # Wait for the application to start
+          echo "Waiting for OpenHands to start..."
+          max_attempts=15
+          attempt=1
+
+          while [ $attempt -le $max_attempts ]; do
+            echo "Checking if OpenHands is running (attempt $attempt of $max_attempts)..."
+
+            # Check if the process is still running
+            if ! ps -p $MAKE_PID > /dev/null; then
+              echo "ERROR: OpenHands process has terminated unexpectedly"
+              echo "Last 50 lines of the log:"
+              tail -n 50 /tmp/openhands-e2e-test.log
+              exit 1
+            fi
+
+            # Check if frontend port is open
+            if nc -z localhost 12000; then
+              # Verify we can get HTML content
+              if curl -s http://localhost:12000 | grep -q "<html"; then
+                echo "SUCCESS: OpenHands is running and serving HTML content on port 12000"
+                break
+              else
+                echo "Port 12000 is open but not serving HTML content yet"
+              fi
+            else
+              echo "Frontend port 12000 is not open yet"
+            fi
+
+            # Show log output on each attempt
+            echo "Recent log output:"
+            tail -n 20 /tmp/openhands-e2e-test.log
+
+            # Wait before next attempt
+            echo "Waiting 10 seconds before next check..."
+            sleep 10
+            attempt=$((attempt + 1))
+
+            # Exit if we've reached the maximum number of attempts
+            if [ $attempt -gt $max_attempts ]; then
+              echo "ERROR: OpenHands failed to start after $max_attempts attempts"
+              echo "Last 50 lines of the log:"
+              tail -n 50 /tmp/openhands-e2e-test.log
+              exit 1
+            fi
+          done
+
+          # Final verification that the app is running
+          if ! nc -z localhost 12000 || ! curl -s http://localhost:12000 | grep -q "<html"; then
+            echo "ERROR: OpenHands is not running properly on port 12000"
+            echo "Last 50 lines of the log:"
+            tail -n 50 /tmp/openhands-e2e-test.log
+            exit 1
+          fi
+
+          # Print success message
+          echo "OpenHands is running successfully on port 12000"
+
+      - name: Run end-to-end tests
+        env:
+          GITHUB_TOKEN: ${{ secrets.E2E_TEST_GITHUB_TOKEN }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL || 'gpt-4o' }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY || 'test-key' }}
+          LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
+        run: |
+          # Check if the application is running
+          if ! nc -z localhost 12000; then
+            echo "ERROR: OpenHands is not running on port 12000"
+            echo "Last 50 lines of the log:"
+            tail -n 50 /tmp/openhands-e2e-test.log
+            exit 1
+          fi
+
+          # Run the tests with detailed output
+          cd tests/e2e
+          poetry run python -m pytest test_e2e_workflow.py::test_github_token_configuration test_e2e_workflow.py::test_conversation_start -v --no-header --capture=no --timeout=600
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-report
+          path: tests/e2e/test-results/
+          retention-days: 30
+
+      - name: Upload OpenHands logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: openhands-logs
+          path: |
+            /tmp/openhands-e2e-test.log
+            /tmp/openhands-e2e-build.log
+            /tmp/openhands-backend.log
+            /tmp/openhands-frontend.log
+            /tmp/backend-health-check.log
+            /tmp/frontend-check.log
+            /tmp/vite-config.log
+            /tmp/makefile-contents.log
+          retention-days: 30
+
+      - name: Cleanup
+        if: always()
+        run: |
+          # Stop OpenHands processes
+          echo "Stopping OpenHands processes..."
+          pkill -f "python -m openhands.server" || true
+          pkill -f "npm run dev" || true
+          pkill -f "make run" || true
+
+          # Print process status for debugging
+          echo "Checking if any OpenHands processes are still running:"
+          ps aux | grep -E "openhands|npm run dev" || true
--- a/.github/workflows/py-tests.yml
+++ b/.github/workflows/py-tests.yml
@@ -51,8 +51,6 @@ jobs:
        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
      - name: Run Runtime Tests with CLIRuntime
        run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
-      - name: Run E2E Tests
-        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e

  # Run specific Windows python tests
  test-on-windows:
--- a/.gitignore
+++ b/.gitignore
@@ -254,3 +254,6 @@ containers/runtime/Dockerfile
 containers/runtime/project.tar.gz
 containers/runtime/code
 **/node_modules/
+
+# test results
+test-results
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -6114,6 +6114,60 @@
        "node": ">=14.0.0"
      }
    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
+      "version": "1.4.3",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.0.2",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
+      "version": "1.4.3",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
+      "version": "1.0.2",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
+      "version": "0.2.11",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "^1.4.3",
+        "@emnapi/runtime": "^1.4.3",
+        "@tybys/wasm-util": "^0.9.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
+      "version": "0.9.0",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
+      "version": "2.8.0",
+      "inBundle": true,
+      "license": "0BSD",
+      "optional": true
+    },
    "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
      "version": "4.1.11",
      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -31,6 +31,52 @@ export default defineConfig(({ mode }) => {
      svgr(),
      tailwindcss(),
    ],
+    optimizeDeps: {
+      include: [
+        // Pre-bundle ALL dependencies to prevent runtime optimization and page reloads
+        // These are discovered during initial app load:
+        "react-redux",
+        "posthog-js",
+        "@tanstack/react-query",
+        "react-hot-toast",
+        "@reduxjs/toolkit",
+        "i18next",
+        "i18next-http-backend",
+        "i18next-browser-languagedetector",
+        "react-i18next",
+        "axios",
+        "date-fns",
+        "@uidotdev/usehooks",
+        "react-icons/fa6",
+        "react-icons/fa",
+        "clsx",
+        "tailwind-merge",
+        "@heroui/react",
+        "lucide-react",
+        "react-select",
+        "react-select/async",
+        "@microlink/react-json-view",
+        "socket.io-client",
+        // These are discovered when launching conversations:
+        "react-icons/vsc",
+        "react-icons/lu",
+        "react-icons/di",
+        "react-icons/io5",
+        "react-icons/io",  // Added to prevent runtime optimization
+        "@monaco-editor/react",
+        "react-textarea-autosize",
+        "react-markdown",
+        "remark-gfm",
+        "remark-breaks",
+        "react-syntax-highlighter",
+        "react-syntax-highlighter/dist/esm/styles/prism",
+        "react-syntax-highlighter/dist/esm/styles/hljs",
+        // Terminal dependencies - added to prevent runtime optimization
+        "@xterm/addon-fit",
+        "@xterm/xterm",
+        "@xterm/xterm/css/xterm.css",
+      ],
+    },
    server: {
      port: FE_PORT,
      host: true,
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.

 [[package]]
 name = "aiofiles"
@@ -404,7 +404,7 @@ description = "LTS Port of Python audioop"
 optional = false
 python-versions = ">=3.13"
 groups = ["main"]
-markers = "python_version == \"3.13\""
+markers = "python_version >= \"3.13\""
 files = [
    {file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd1345ae99e17e6910f47ce7d52673c6a1a70820d78b67de1b7abb3af29c426a"},
    {file = "audioop_lts-0.2.1-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:e175350da05d2087e12cea8e72a70a1a8b14a17e92ed2022952a4419689ede5e"},
@@ -2997,8 +2997,8 @@ files = [
 google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
 google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
 proto-plus = [
-    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
    {version = ">=1.22.3,<2.0.0dev"},
+    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
 ]
 protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"

@@ -3020,8 +3020,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.0"
 grpcio = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
 grpcio-status = {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
 proto-plus = [
-    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
    {version = ">=1.22.3,<2.0.0"},
+    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
 ]
 protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
 requests = ">=2.18.0,<3.0.0"
@@ -3239,8 +3239,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras
 google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
 grpc-google-iam-v1 = ">=0.14.0,<1.0.0"
 proto-plus = [
-    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
    {version = ">=1.22.3,<2.0.0"},
+    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
 ]
 protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"

@@ -3462,7 +3462,6 @@ files = [
    {file = "greenlet-3.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:eeb27bece45c0c2a5842ac4c5a1b5c2ceaefe5711078eed4e8043159fa05c834"},
    {file = "greenlet-3.2.2.tar.gz", hash = "sha256:ad053d34421a2debba45aa3cc39acf454acbcd025b3fc1a9f8a0dee237abd485"},
 ]
-markers = {test = "platform_python_implementation == \"CPython\""}

 [package.extras]
 docs = ["Sphinx", "furo"]
@@ -6664,8 +6663,8 @@ files = [
 [package.dependencies]
 googleapis-common-protos = ">=1.52,<2.0"
 grpcio = [
-    {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
    {version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
+    {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
 ]
 opentelemetry-api = ">=1.15,<2.0"
 opentelemetry-exporter-otlp-proto-common = "1.34.1"
@@ -7063,7 +7062,7 @@ version = "1.52.0"
 description = "A high-level API to automate web browsers"
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "evaluation"]
+groups = ["main", "evaluation", "test"]
 files = [
    {file = "playwright-1.52.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:19b2cb9d4794062008a635a99bd135b03ebb782d460f96534a91cb583f549512"},
    {file = "playwright-1.52.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0797c0479cbdc99607412a3c486a3a2ec9ddc77ac461259fd2878c975bcbb94a"},
@@ -7737,7 +7736,7 @@ version = "13.0.0"
 description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "evaluation"]
+groups = ["main", "evaluation", "test"]
 files = [
    {file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"},
    {file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"},
@@ -7975,6 +7974,25 @@ pytest = ">=8.2,<9"
 docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
 testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]

+[[package]]
+name = "pytest-base-url"
+version = "2.1.0"
+description = "pytest plugin for URL based testing"
+optional = false
+python-versions = ">=3.8"
+groups = ["test"]
+files = [
+    {file = "pytest_base_url-2.1.0-py3-none-any.whl", hash = "sha256:3ad15611778764d451927b2a53240c1a7a591b521ea44cebfe45849d2d2812e6"},
+    {file = "pytest_base_url-2.1.0.tar.gz", hash = "sha256:02748589a54f9e63fcbe62301d6b0496da0d10231b753e950c63e03aee745d45"},
+]
+
+[package.dependencies]
+pytest = ">=7.0.0"
+requests = ">=2.9"
+
+[package.extras]
+test = ["black (>=22.1.0)", "flake8 (>=4.0.1)", "pre-commit (>=2.17.0)", "pytest-localserver (>=0.7.1)", "tox (>=3.24.5)"]
+
 [[package]]
 name = "pytest-cov"
 version = "6.2.1"
@@ -8011,6 +8029,39 @@ files = [
 py = "*"
 pytest = ">=3.10"

+[[package]]
+name = "pytest-playwright"
+version = "0.7.0"
+description = "A pytest wrapper with fixtures for Playwright to automate web browsers"
+optional = false
+python-versions = ">=3.9"
+groups = ["test"]
+files = [
+    {file = "pytest_playwright-0.7.0-py3-none-any.whl", hash = "sha256:2516d0871fa606634bfe32afbcc0342d68da2dbff97fe3459849e9c428486da2"},
+    {file = "pytest_playwright-0.7.0.tar.gz", hash = "sha256:b3f2ea514bbead96d26376fac182f68dcd6571e7cb41680a89ff1673c05d60b6"},
+]
+
+[package.dependencies]
+playwright = ">=1.18"
+pytest = ">=6.2.4,<9.0.0"
+pytest-base-url = ">=1.0.0,<3.0.0"
+python-slugify = ">=6.0.0,<9.0.0"
+
+[[package]]
+name = "pytest-timeout"
+version = "2.4.0"
+description = "pytest plugin to abort hanging tests"
+optional = false
+python-versions = ">=3.7"
+groups = ["test"]
+files = [
+    {file = "pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"},
+    {file = "pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a"},
+]
+
+[package.dependencies]
+pytest = ">=7.0.0"
+
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"
@@ -8177,6 +8228,24 @@ Pillow = ">=3.3.2"
 typing-extensions = ">=4.9.0"
 XlsxWriter = ">=0.5.7"

+[[package]]
+name = "python-slugify"
+version = "8.0.4"
+description = "A Python slugify application that also handles Unicode"
+optional = false
+python-versions = ">=3.7"
+groups = ["test"]
+files = [
+    {file = "python-slugify-8.0.4.tar.gz", hash = "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856"},
+    {file = "python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8"},
+]
+
+[package.dependencies]
+text-unidecode = ">=1.3"
+
+[package.extras]
+unidecode = ["Unidecode (>=1.1.1)"]
+
 [[package]]
 name = "python-socketio"
 version = "5.13.0"
@@ -8769,7 +8838,7 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "evaluation", "runtime"]
+groups = ["main", "evaluation", "runtime", "test"]
 files = [
    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -9369,6 +9438,7 @@ files = [
    {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
    {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
 ]
+markers = {evaluation = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}

 [package.extras]
 check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
@@ -9612,7 +9682,7 @@ description = "Standard library aifc redistribution. \"dead battery\"."
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version == \"3.13\""
+markers = "python_version >= \"3.13\""
 files = [
    {file = "standard_aifc-3.13.0-py3-none-any.whl", hash = "sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66"},
    {file = "standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43"},
@@ -9629,7 +9699,7 @@ description = "Standard library chunk redistribution. \"dead battery\"."
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "python_version == \"3.13\""
+markers = "python_version >= \"3.13\""
 files = [
    {file = "standard_chunk-3.13.0-py3-none-any.whl", hash = "sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c"},
    {file = "standard_chunk-3.13.0.tar.gz", hash = "sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654"},
@@ -9896,6 +9966,18 @@ aiohttp = ">=3.8,<4.0"
 huggingface-hub = ">=0.12,<1.0"
 pydantic = ">2,<3"

+[[package]]
+name = "text-unidecode"
+version = "1.3"
+description = "The most basic Text::Unidecode port"
+optional = false
+python-versions = "*"
+groups = ["test"]
+files = [
+    {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
+    {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
+]
+
 [[package]]
 name = "tifffile"
 version = "2025.6.1"
@@ -10655,7 +10737,7 @@ version = "2.4.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "evaluation", "runtime"]
+groups = ["main", "evaluation", "runtime", "test"]
 files = [
    {file = "urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813"},
    {file = "urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466"},
@@ -11797,4 +11879,4 @@ third-party-runtimes = ["daytona", "e2b", "modal", "runloop-api-client"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12,<3.14"
-content-hash = "9fd177a2dfa1eebb9212e515db93c58f82d6126cc2d131de5321d68772bc2a59"
+content-hash = "dbcab8224ee537e465f51c5170d8c19e749236c7ba01268f459140c95266afd7"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -126,6 +126,8 @@ pytest-cov = "*"
 pytest-asyncio = "*"
 pytest-forked = "*"
 pytest-xdist = "*"
+pytest-playwright = "^0.7.0"
+pytest-timeout = "^2.4.0"
 openai = "*"
 pandas = "*"
 reportlab = "*"
--- a/tests/e2e/README.md
+++ b/tests/e2e/README.md
@@ -0,0 +1,112 @@
+# OpenHands End-to-End Tests
+
+This directory contains end-to-end tests for the OpenHands application. These tests use Playwright to interact with the OpenHands UI and verify that the application works correctly.
+
+## Running the Tests
+
+### Prerequisites
+
+- Python 3.12 or later
+- Poetry
+- Node.js
+- Playwright
+
+### Environment Variables
+
+The following environment variables are required:
+
+- `GITHUB_TOKEN`: A GitHub token with access to the repositories you want to test
+- `LLM_MODEL`: The LLM model to use (e.g., "gpt-4o")
+- `LLM_API_KEY`: The API key for the LLM model
+
+Optional environment variables:
+
+- `LLM_BASE_URL`: The base URL for the LLM API (if using a custom endpoint)
+
+### Running Locally
+
+To run the full end-to-end test suite locally:
+
+```bash
+cd tests/e2e
+poetry run pytest test_e2e_workflow.py -v
+```
+
+This runs all tests in sequence:
+1. GitHub token configuration
+2. Conversation start
+
+### Running Individual Tests
+
+You can run individual tests directly:
+
+```bash
+cd tests/e2e
+# Run the GitHub token configuration test
+poetry run pytest test_e2e_workflow.py::test_github_token_configuration -v
+
+# Run the conversation start test
+poetry run pytest test_e2e_workflow.py::test_conversation_start -v
+
+
+```
+
+### Running with Visible Browser
+
+To run the tests with a visible browser (non-headless mode) so you can watch the browser interactions:
+
+```bash
+cd tests/e2e
+poetry run pytest test_e2e_workflow.py::test_github_token_configuration -v --no-headless --slow-mo=50
+poetry run pytest test_e2e_workflow.py::test_conversation_start -v --no-headless --slow-mo=50
+```
+
+### GitHub Workflow
+
+The tests can also be run as part of a GitHub workflow. The workflow is triggered by:
+
+1. Adding the "end-to-end" label to a pull request
+2. Manually triggering the workflow from the GitHub Actions tab
+
+## Test Descriptions
+
+### GitHub Token Configuration Test
+
+The GitHub token configuration test (`test_github_token_configuration`) performs the following steps:
+
+1. Navigates to the OpenHands application
+2. Checks if the GitHub token is already configured:
+   - If not configured, it navigates to the settings page and configures it
+   - If already configured, it verifies the repository selection is available
+3. Verifies that the GitHub token is saved and the repository selection is available
+
+### Conversation Start Test
+
+The conversation start test (`test_conversation_start`) performs the following steps:
+
+1. Navigates to the OpenHands application (assumes GitHub token is already configured)
+2. Selects the "openhands-agent/OpenHands" repository
+3. Clicks the "Launch" button
+4. Waits for the conversation interface to load
+5. Waits for the agent to initialize
+6. Asks "How many lines are there in the main README.md file?"
+7. Waits for and verifies the agent's response
+
+
+
+### Simple Browser Navigation Test
+
+A simple test (`test_simple_browser_navigation`) that just navigates to the OpenHands GitHub repository to verify the browser setup works correctly.
+
+### Local Runtime Test
+
+A separate test (`test_headless_mode_with_dummy_agent_no_browser` in `test_local_runtime.py`) that tests the local runtime with a dummy agent in headless mode.
+
+## Troubleshooting
+
+If the tests fail, check the following:
+
+1. Make sure all required environment variables are set
+2. Check the logs in `/tmp/openhands-e2e-test.log` and `/tmp/openhands-e2e-build.log`
+3. Verify that the OpenHands application is running correctly
+4. Check the Playwright test results in the `test-results` directory
--- a/tests/e2e/check_playwright.py
+++ b/tests/e2e/check_playwright.py
@@ -0,0 +1,15 @@
+import sys
+
+try:
+    from playwright.sync_api import sync_playwright
+
+    with sync_playwright() as p:
+        if p.chromium.executable_path:
+            print('chromium_found')
+            sys.exit(0)
+        else:
+            print('chromium_not_found')
+            sys.exit(1)
+except Exception as e:
+    print(f'error: {e}')
+    sys.exit(1)
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -0,0 +1,46 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    """Add command-line options for controlling browser behavior."""
+    parser.addoption(
+        '--headless',
+        action='store_true',
+        default=True,
+        help='Run browser in headless mode (default)',
+    )
+    parser.addoption(
+        '--no-headless',
+        action='store_false',
+        dest='headless',
+        help='Run browser in non-headless mode to watch the browser',
+    )
+    parser.addoption(
+        '--slow-mo',
+        action='store',
+        default=0,
+        type=int,
+        help='Add delay between actions in milliseconds (default: 0)',
+    )
+
+
+@pytest.fixture(scope='session')
+def browser_context_args(browser_context_args):
+    """Return the browser context args."""
+    return browser_context_args
+
+
+@pytest.fixture(scope='session')
+def browser_type_launch_args(request):
+    """Override the browser launch arguments based on command-line options."""
+    headless = request.config.getoption('--headless')
+    slow_mo = request.config.getoption('--slow-mo')
+
+    args = {
+        'headless': headless,
+    }
+
+    if slow_mo > 0:
+        args['slow_mo'] = slow_mo
+
+    return args
--- a/tests/e2e/pytest.ini
+++ b/tests/e2e/pytest.ini
@@ -0,0 +1,6 @@
+[pytest]
+testpaths = tests/e2e
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+timeout = 300
--- a/tests/e2e/test_e2e_workflow.py
+++ b/tests/e2e/test_e2e_workflow.py