Add GitHub resolver integration tests with mock server

This adds integration tests for the GitHub resolver feature: - Mock GitHub Server (mocks/github-mock-server.ts): - Simulates GitHub REST API endpoints - Handles webhook signature verification - Records webhook events and outgoing responses - Provides test control endpoints for assertions - Webhook Payload Templates (mocks/github-webhook-payloads.ts): - Issue labeled events - Issue comment events - PR review comment events - Mock GitHub Client (mocks/mock-github-client.ts): - Client utilities for triggering webhooks - Helpers for waiting on resolver responses - GitHub Resolver Test Spec (tests/github-resolver.spec.ts): - Mock Server Mode: Tests full webhook flow with mock server - Live Environment Mode: Tests against staging/production - Error handling tests for invalid signatures and malformed data - Tests run against the existing authenticated session - Updated package.json with new scripts: - npm run test:github-resolver - npm run mock:github - Updated README with comprehensive documentation Co-authored-by: openhands <openhands@all-hands.dev>
Add ESLint and Prettier lint checks for integration tests
2026-04-29 03:00:45 -04:00 · 2026-03-12 14:46:06 +00:00 · 2026-03-11 21:31:18 +00:00 · 2026-03-11 15:26:32 -06:00 · 2026-03-11 15:23:13 -06:00 · 2026-03-11 15:19:52 -06:00
40 changed files with 9031 additions and 1868 deletions
@@ -0,0 +1,193 @@
+name: Smoke Tests
+
+on:
+  # Manual trigger only - uncomment triggers below to enable automatic runs
+  # push:
+  #   branches: [main]
+  # pull_request:
+  #   branches: [main]
+  # schedule:
+  #   - cron: '0 */6 * * *'
+
+  # Manual trigger with environment selection
+  workflow_dispatch:
+    inputs:
+      environment:
+        description: 'Target environment'
+        required: true
+        default: 'staging'
+        type: choice
+        options:
+          - staging
+          - production
+      base_url:
+        description: 'Custom base URL (overrides environment selection)'
+        required: false
+        type: string
+
+# Cancel previous runs on the same branch
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  NODE_VERSION: '22'
+
+jobs:
+  smoke-tests:
+    name: Smoke Tests (${{ github.event.inputs.environment || 'staging' }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+          cache-dependency-path: integration_tests/package-lock.json
+
+      - name: Install dependencies
+        working-directory: ./integration_tests
+        run: npm ci
+
+      - name: Install Playwright browsers
+        working-directory: ./integration_tests
+        run: npx playwright install --with-deps chromium
+
+      - name: Determine base URL
+        id: base-url
+        run: |
+          if [ -n "${{ github.event.inputs.base_url }}" ]; then
+            echo "url=${{ github.event.inputs.base_url }}" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event.inputs.environment }}" = "production" ]; then
+            echo "url=https://app.all-hands.dev" >> $GITHUB_OUTPUT
+          else
+            echo "url=https://staging.all-hands.dev" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Run smoke tests
+        working-directory: ./integration_tests
+        env:
+          BASE_URL: ${{ steps.base-url.outputs.url }}
+          AUTH_METHOD: github
+          GITHUB_TEST_USERNAME: ${{ secrets.SMOKE_TEST_GITHUB_USERNAME }}
+          GITHUB_TEST_PASSWORD: ${{ secrets.SMOKE_TEST_GITHUB_PASSWORD }}
+          GITHUB_TEST_TOTP_SECRET: ${{ secrets.SMOKE_TEST_GITHUB_TOTP_SECRET }}
+          TEST_REPO_URL: ${{ secrets.SMOKE_TEST_REPO_URL }}
+          CI: true
+        run: npm test
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report-${{ github.run_id }}
+          path: integration_tests/playwright-report/
+          retention-days: 30
+
+      - name: Upload test artifacts
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: test-results-${{ github.run_id }}
+          path: |
+            integration_tests/test-results/
+            integration_tests/playwright-report/
+          retention-days: 14
+
+  # Notify on failure (optional - configure Slack/Discord webhook)
+  # Disabled until automatic triggers are enabled
+  notify-failure:
+    name: Notify on Failure
+    runs-on: ubuntu-latest
+    needs: smoke-tests
+    if: false # Disabled - set to: failure() && github.ref == 'refs/heads/main'
+
+    steps:
+      - name: Send notification
+        run: |
+          echo "Smoke tests failed on main branch!"
+          echo "View results: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          # Add Slack/Discord notification here if needed
+          # Example with curl to Slack webhook:
+          # curl -X POST -H 'Content-type: application/json' \
+          #   --data '{"text":"🚨 Smoke tests failed on main! <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Results>"}' \
+          #   ${{ secrets.SLACK_WEBHOOK_URL }}
+
+  # Feature branch smoke tests (disabled - was triggered by PR label)
+  # To enable: uncomment pull_request trigger above and this job
+  feature-branch-test:
+    name: Feature Branch Smoke Test
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    if: false # Disabled - set to: github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'smoke-test')
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+          cache-dependency-path: integration_tests/package-lock.json
+
+      - name: Install dependencies
+        working-directory: ./integration_tests
+        run: npm ci
+
+      - name: Install Playwright browsers
+        working-directory: ./integration_tests
+        run: npx playwright install --with-deps chromium
+
+      - name: Extract branch name for URL
+        id: branch
+        run: |
+          BRANCH_NAME="${{ github.head_ref }}"
+          # Sanitize branch name for URL (replace special chars)
+          SANITIZED=$(echo "$BRANCH_NAME" | sed 's/[^a-zA-Z0-9-]/-/g' | tr '[:upper:]' '[:lower:]')
+          echo "name=$SANITIZED" >> $GITHUB_OUTPUT
+
+      - name: Run smoke tests against feature branch
+        working-directory: ./integration_tests
+        env:
+          BASE_URL: https://${{ steps.branch.outputs.name }}.staging.all-hands.dev
+          AUTH_METHOD: github
+          GITHUB_TEST_USERNAME: ${{ secrets.SMOKE_TEST_GITHUB_USERNAME }}
+          GITHUB_TEST_PASSWORD: ${{ secrets.SMOKE_TEST_GITHUB_PASSWORD }}
+          CI: true
+        run: npm test
+        continue-on-error: true
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: feature-branch-report-${{ github.run_id }}
+          path: integration_tests/playwright-report/
+          retention-days: 7
+
+      - name: Comment on PR with results
+        uses: actions/github-script@v7
+        if: always()
+        with:
+          script: |
+            const fs = require('fs');
+            const outcome = '${{ job.status }}';
+            const branchUrl = 'https://${{ steps.branch.outputs.name }}.staging.all-hands.dev';
+
+            const body = outcome === 'success'
+              ? `✅ **Smoke tests passed** against [${branchUrl}](${branchUrl})`
+              : `❌ **Smoke tests failed** against [${branchUrl}](${branchUrl})\n\nView the [test report](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}) for details.`;
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: body
+            });
@@ -6190,14 +6190,14 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0

 [[package]]
 name = "openhands-agent-server"
-version = "1.13.0"
+version = "1.12.0"
 description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
 optional = false
 python-versions = ">=3.12"
 groups = ["main"]
 files = [
-    {file = "openhands_agent_server-1.13.0-py3-none-any.whl", hash = "sha256:88bb8bfb03ff0cc7a7d32ffabd108d0a284f4333f33a9de27ce158b6d828bc29"},
-    {file = "openhands_agent_server-1.13.0.tar.gz", hash = "sha256:6f8b296c0f26a478d4eb49668a353e2b6997c39022c2bbcc36325f5f08887a7a"},
+    {file = "openhands_agent_server-1.12.0-py3-none-any.whl", hash = "sha256:3bd62fef10092f1155af116a8a7417041d574eff9d4e4b6f7a24bfc432de2fad"},
+    {file = "openhands_agent_server-1.12.0.tar.gz", hash = "sha256:7ea7ce579175f713ed68b68cde5d685ef694627ac7bbff40d2e22913f065c46d"},
 ]

 [package.dependencies]
@@ -6214,7 +6214,7 @@ wsproto = ">=1.2.0"

 [[package]]
 name = "openhands-ai"
-version = "1.5.0"
+version = "1.4.0"
 description = "OpenHands: Code Less, Make More"
 optional = false
 python-versions = "^3.12,<3.14"
@@ -6259,9 +6259,9 @@ memory-profiler = ">=0.61"
 numpy = "*"
 openai = "2.8"
 openhands-aci = "0.3.3"
-openhands-agent-server = "1.13"
-openhands-sdk = "1.13"
-openhands-tools = "1.13"
+openhands-agent-server = "1.12"
+openhands-sdk = "1.12"
+openhands-tools = "1.12"
 opentelemetry-api = ">=1.33.1"
 opentelemetry-exporter-otlp-proto-grpc = ">=1.33.1"
 pathspec = ">=0.12.1"
@@ -6315,14 +6315,14 @@ url = ".."

 [[package]]
 name = "openhands-sdk"
-version = "1.13.0"
+version = "1.12.0"
 description = "OpenHands SDK - Core functionality for building AI agents"
 optional = false
 python-versions = ">=3.12"
 groups = ["main"]
 files = [
-    {file = "openhands_sdk-1.13.0-py3-none-any.whl", hash = "sha256:ec83f9fa2934aae9c4ce1c0365a7037f7e17869affa44a40e71ba49d2bef7185"},
-    {file = "openhands_sdk-1.13.0.tar.gz", hash = "sha256:fbb2a2dc4852ea23cc697a36fb3f95ca47cfef432b0d195c496de6f374caad9c"},
+    {file = "openhands_sdk-1.12.0-py3-none-any.whl", hash = "sha256:857793f5c27fd63c0d4d37762550e6c504a03dd06116475c23adcc14bb5c4c02"},
+    {file = "openhands_sdk-1.12.0.tar.gz", hash = "sha256:ac348e7134ea21e1ab453978962504aff8eb47e62df1fb7a503d769d55658ea9"},
 ]

 [package.dependencies]
@@ -6345,14 +6345,14 @@ boto3 = ["boto3 (>=1.35.0)"]

 [[package]]
 name = "openhands-tools"
-version = "1.13.0"
+version = "1.12.0"
 description = "OpenHands Tools - Runtime tools for AI agents"
 optional = false
 python-versions = ">=3.12"
 groups = ["main"]
 files = [
-    {file = "openhands_tools-1.13.0-py3-none-any.whl", hash = "sha256:87073b868e20f9c769497f480e0d15b14ca41314c3d1cb5076029f37408a1d68"},
-    {file = "openhands_tools-1.13.0.tar.gz", hash = "sha256:e1181701efab5bc3133566e3b1640027824147438959cd8ce7430c941896704d"},
+    {file = "openhands_tools-1.12.0-py3-none-any.whl", hash = "sha256:57207e9e30f9d7fe9121cd21b072580cfdc2a00831edeaf8e8d685d721bb9e33"},
+    {file = "openhands_tools-1.12.0.tar.gz", hash = "sha256:f2b4d81d0b6771f5416f8b702db09a14999fa8e553073bcf38f344e29aae770c"},
 ]

 [package.dependencies]
@@ -13771,22 +13771,24 @@ files = [

 [[package]]
 name = "tornado"
-version = "6.5.5"
+version = "6.5.4"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa"},
-    {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521"},
-    {file = "tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5"},
-    {file = "tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07"},
-    {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e"},
-    {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca"},
-    {file = "tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7"},
-    {file = "tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b"},
-    {file = "tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6"},
-    {file = "tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9"},
+    {file = "tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9"},
+    {file = "tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843"},
+    {file = "tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17"},
+    {file = "tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335"},
+    {file = "tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f"},
+    {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84"},
+    {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f"},
+    {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8"},
+    {file = "tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1"},
+    {file = "tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc"},
+    {file = "tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1"},
+    {file = "tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7"},
 ]

 [[package]]
@@ -334,10 +334,7 @@ class SaasSQLAppConversationInfoService(SQLAppConversationInfoService):
        await super().save_app_conversation_info(info)

        # Get current user_id for SAAS metadata
-        # Fall back to info.created_by_user_id for webhook callbacks (which use ADMIN context)
        user_id_str = await self.user_context.get_user_id()
-        if not user_id_str and info.created_by_user_id:
-            user_id_str = info.created_by_user_id
        if user_id_str:
            # Convert string user_id to UUID
            user_id_uuid = UUID(user_id_str)
@@ -663,131 +663,3 @@ class TestSaasSQLAppConversationInfoServiceAdminContext:

        admin_page = await admin_service.search_app_conversation_info()
        assert len(admin_page.items) == 5
-
-
-class TestSaasSQLAppConversationInfoServiceWebhookFallback:
-    """Test suite for webhook callback fallback using info.created_by_user_id."""
-
-    @pytest.mark.asyncio
-    async def test_save_with_admin_context_uses_created_by_user_id_fallback(
-        self,
-        async_session_with_users: AsyncSession,
-    ):
-        """Test that save_app_conversation_info uses info.created_by_user_id when user_context returns None.
-
-        This is the key fix for SDK-created conversations: when the webhook endpoint
-        uses ADMIN context (user_id=None), the service should fall back to using
-        the created_by_user_id from the AppConversationInfo object.
-        """
-        from storage.stored_conversation_metadata_saas import (
-            StoredConversationMetadataSaas,
-        )
-
-        from openhands.app_server.user.specifiy_user_context import ADMIN
-
-        # Arrange: Create service with ADMIN context (user_id=None)
-        admin_service = SaasSQLAppConversationInfoService(
-            db_session=async_session_with_users,
-            user_context=ADMIN,
-        )
-
-        # Create conversation info with created_by_user_id set (as would come from sandbox_info)
-        conv_id = uuid4()
-        conv_info = AppConversationInfo(
-            id=conv_id,
-            created_by_user_id=str(USER1_ID),  # This should be used as fallback
-            sandbox_id='sandbox_webhook_test',
-            title='Webhook Created Conversation',
-        )
-
-        # Act: Save using ADMIN context
-        await admin_service.save_app_conversation_info(conv_info)
-
-        # Assert: SAAS metadata should be created with user_id from info.created_by_user_id
-        saas_query = select(StoredConversationMetadataSaas).where(
-            StoredConversationMetadataSaas.conversation_id == str(conv_id)
-        )
-        result = await async_session_with_users.execute(saas_query)
-        saas_metadata = result.scalar_one_or_none()
-
-        assert saas_metadata is not None, 'SAAS metadata should be created'
-        assert (
-            saas_metadata.user_id == USER1_ID
-        ), 'user_id should match info.created_by_user_id'
-        assert saas_metadata.org_id == ORG1_ID, 'org_id should match user current org'
-
-    @pytest.mark.asyncio
-    async def test_save_with_admin_context_no_user_id_skips_saas_metadata(
-        self,
-        async_session_with_users: AsyncSession,
-    ):
-        """Test that save_app_conversation_info skips SAAS metadata when both user_context and info have no user_id."""
-        from storage.stored_conversation_metadata_saas import (
-            StoredConversationMetadataSaas,
-        )
-
-        from openhands.app_server.user.specifiy_user_context import ADMIN
-
-        # Arrange: Create service with ADMIN context (user_id=None)
-        admin_service = SaasSQLAppConversationInfoService(
-            db_session=async_session_with_users,
-            user_context=ADMIN,
-        )
-
-        # Create conversation info without created_by_user_id
-        conv_id = uuid4()
-        conv_info = AppConversationInfo(
-            id=conv_id,
-            created_by_user_id=None,  # No user_id available
-            sandbox_id='sandbox_no_user',
-            title='No User Conversation',
-        )
-
-        # Act: Save using ADMIN context with no user_id fallback
-        await admin_service.save_app_conversation_info(conv_info)
-
-        # Assert: SAAS metadata should NOT be created
-        saas_query = select(StoredConversationMetadataSaas).where(
-            StoredConversationMetadataSaas.conversation_id == str(conv_id)
-        )
-        result = await async_session_with_users.execute(saas_query)
-        saas_metadata = result.scalar_one_or_none()
-
-        assert (
-            saas_metadata is None
-        ), 'SAAS metadata should not be created without user_id'
-
-    @pytest.mark.asyncio
-    async def test_webhook_created_conversation_visible_to_user(
-        self,
-        async_session_with_users: AsyncSession,
-    ):
-        """Test end-to-end: conversation saved via webhook is visible to the owning user."""
-        from openhands.app_server.user.specifiy_user_context import ADMIN
-
-        # Arrange: Save conversation using ADMIN context (simulating webhook)
-        admin_service = SaasSQLAppConversationInfoService(
-            db_session=async_session_with_users,
-            user_context=ADMIN,
-        )
-
-        conv_id = uuid4()
-        conv_info = AppConversationInfo(
-            id=conv_id,
-            created_by_user_id=str(USER1_ID),
-            sandbox_id='sandbox_webhook_e2e',
-            title='E2E Webhook Conversation',
-        )
-        await admin_service.save_app_conversation_info(conv_info)
-
-        # Act: Query as the owning user
-        user1_service = SaasSQLAppConversationInfoService(
-            db_session=async_session_with_users,
-            user_context=SpecifyUserContext(user_id=str(USER1_ID)),
-        )
-        user1_page = await user1_service.search_app_conversation_info()
-
-        # Assert: User should see the webhook-created conversation
-        assert len(user1_page.items) == 1
-        assert user1_page.items[0].id == conv_id
-        assert user1_page.items[0].title == 'E2E Webhook Conversation'
@@ -87,6 +87,7 @@ export function AgentStatus({
  return (
    <div className={cn("flex items-center gap-1 min-w-0", className)}>
      <span
+        data-testid="agent-status-text"
        className="text-[11px] text-white font-normal leading-5 flex-1 min-w-0 max-w-full whitespace-normal break-words"
        title={t(statusCode)}
      >
@@ -0,0 +1,73 @@
+# OpenHands Integration Tests - Environment Configuration
+# Copy this file to .env and fill in your values
+
+# =============================================================================
+# TARGET ENVIRONMENT
+# =============================================================================
+
+# Base URL for the application under test
+# Options:
+#   - https://staging.all-hands.dev (default)
+#   - https://app.all-hands.dev (production)
+#   - https://<feature_branch>.staging.all-hands.dev (feature branches)
+#   - http://localhost:3000 (local development)
+BASE_URL=https://staging.all-hands.dev
+
+# Environment name (optional, used for logging)
+# Options: staging, production, local, feature
+TEST_ENV=staging
+
+# =============================================================================
+# AUTHENTICATION
+# =============================================================================
+
+# Authentication method
+# Options:
+#   - github (default): Use GitHub OAuth
+#   - keycloak: Use Keycloak authentication
+#   - skip: Skip auth setup (use existing fixtures/auth.json)
+AUTH_METHOD=github
+
+# GitHub OAuth Credentials (required for AUTH_METHOD=github)
+# Create a test account specifically for automation testing
+GITHUB_TEST_USERNAME=your-github-test-username
+GITHUB_TEST_PASSWORD=your-github-test-password
+
+# GitHub 2FA TOTP Secret (optional, only if 2FA is enabled on test account)
+# You can get this when setting up 2FA - it's the secret key shown
+# GITHUB_TEST_TOTP_SECRET=your-totp-secret
+
+# Keycloak Credentials (required for AUTH_METHOD=keycloak)
+# KEYCLOAK_URL=https://auth.your-domain.com
+# KEYCLOAK_USERNAME=test-user
+# KEYCLOAK_PASSWORD=test-password
+
+# =============================================================================
+# TEST CONFIGURATION
+# =============================================================================
+
+# Repository to use for testing (should be a private repo the test user has access to)
+#TEST_REPO_URL=https://github.com/OpenHands/deploy
+
+# Prompt to send to the agent during smoke tests
+TEST_PROMPT="Flip a coin!"
+
+# =============================================================================
+# CI/CD CONFIGURATION
+# =============================================================================
+
+# Set to true when running in CI environment
+# CI=true
+
+# Playwright specific settings
+# PLAYWRIGHT_HTML_REPORT=playwright-report
+
+# =============================================================================
+# DEBUG OPTIONS
+# =============================================================================
+
+# Enable debug logging
+# DEBUG=pw:api
+
+# Keep browser open after tests (headed mode only)
+# PWDEBUG=1
@@ -0,0 +1,63 @@
+{
+  "parser": "@typescript-eslint/parser",
+  "parserOptions": {
+    "project": "./tsconfig.json"
+  },
+  "extends": [
+    "airbnb-base",
+    "airbnb-typescript/base",
+    "prettier",
+    "plugin:@typescript-eslint/eslint-recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "plugins": ["prettier", "unused-imports"],
+  "rules": {
+    "unused-imports/no-unused-imports": "error",
+    "prettier/prettier": ["error"],
+    "@typescript-eslint/prefer-optional-chain": "error",
+    "import/extensions": [
+      "error",
+      "ignorePackages",
+      {
+        "": "never",
+        "ts": "never"
+      }
+    ]
+  },
+  "overrides": [
+    {
+      "files": ["*.ts"],
+      "rules": {
+        "no-param-reassign": [
+          "error",
+          {
+            "props": true,
+            "ignorePropertyModificationsFor": ["acc", "page"]
+          }
+        ],
+        "no-restricted-syntax": "off",
+        "import/prefer-default-export": "off",
+        "no-underscore-dangle": "off",
+        "import/no-extraneous-dependencies": "off",
+        "no-console": "off",
+        "no-await-in-loop": "off",
+        "class-methods-use-this": "off",
+        "@typescript-eslint/no-use-before-define": "off",
+        "no-plusplus": "off",
+        "no-promise-executor-return": "off",
+        "@typescript-eslint/no-throw-literal": "off",
+        "@typescript-eslint/no-shadow": "off",
+        "@typescript-eslint/no-unused-vars": [
+          "error",
+          {
+            "argsIgnorePattern": "^_",
+            "varsIgnorePattern": "^_"
+          }
+        ]
+      },
+      "parserOptions": {
+        "project": ["./tsconfig.json"]
+      }
+    }
+  ]
+}
@@ -0,0 +1,42 @@
+# Dependencies
+node_modules/
+
+# Environment files (contain secrets)
+.env
+.env.local
+.env.*.local
+
+# Test artifacts
+test-results/
+playwright-report/
+playwright/.cache/
+
+# Authentication state (contains session tokens)
+fixtures/auth.json
+
+# TypeScript build output
+*.tsbuildinfo
+dist/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Debug logs
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Screenshots and videos from test runs
+**/*.png
+**/*.webm
+**/*.mp4
+
+# Keep the fixtures directory structure
+!fixtures/.gitkeep
@@ -0,0 +1,3 @@
+{
+    "trailingComma": "all"
+}
@@ -0,0 +1,404 @@
+# OpenHands Integration Tests
+
+End-to-end smoke tests for OpenHands using [Playwright](https://playwright.dev/).
+
+## Overview
+
+These integration tests verify the critical path of the OpenHands application:
+
+1. ✅ User authentication (GitHub OAuth / Keycloak)
+2. ✅ Home screen accessibility
+3. ✅ Repository selection
+4. ✅ Conversation creation
+5. ✅ Agent interaction without errors
+6. ✅ GitHub Resolver integration (enterprise)
+
+## Quick Start
+
+### Prerequisites
+
+- Node.js 20.0.0 or higher
+- A GitHub test account with access to the test repository
+
+### Installation
+
+```bash
+cd integration_tests
+npm install
+npx playwright install chromium  # Install browser
+```
+
+### Configuration
+
+1. Copy the example environment file:
+
+```bash
+cp .env.example .env
+```
+
+2. Edit `.env` with your test credentials:
+
+```env
+GITHUB_TEST_USERNAME=your-test-account
+GITHUB_TEST_PASSWORD=your-test-password
+# Enable this only if your user has access to this repository
+#TEST_REPO_URL=https://github.com/OpenHands/deploy
+```
+
+### Run Tests
+
+```bash
+# Run all smoke tests against staging
+npm test
+
+# Run with visible browser
+npm run test:headed
+
+# Run with Playwright debugger
+npm run test:debug
+
+# Run with UI mode (interactive)
+npm run test:ui
+```
+
+## Environment Support
+
+Tests can run against different environments:
+
+### Staging (Default)
+
+```bash
+npm run test:staging
+# or
+BASE_URL=https://staging.all-hands.dev npm test
+```
+
+### Production
+
+```bash
+npm run test:production
+# or
+BASE_URL=https://app.all-hands.dev npm test
+```
+
+### Feature Branches
+
+```bash
+BASE_URL=https://my-feature-branch.staging.all-hands.dev npm test
+```
+
+### Local Development
+
+```bash
+BASE_URL=http://localhost:3000 npm test
+```
+
+## Authentication
+
+### GitHub OAuth (Default)
+
+The tests use GitHub OAuth for authentication. You'll need a dedicated test account.
+
+**Required Environment Variables:**
+- `GITHUB_TEST_USERNAME` - GitHub username
+- `GITHUB_TEST_PASSWORD` - GitHub password
+- `GITHUB_TEST_TOTP_SECRET` - (Optional) 2FA TOTP secret
+
+**Recommendations:**
+- Use a dedicated test account, not your personal account
+- Disable 2FA on the test account if possible (simpler automation)
+- If 2FA is required, you'll need to implement TOTP generation (see below)
+
+### Keycloak Authentication
+
+For Keycloak-based authentication:
+
+```env
+AUTH_METHOD=keycloak
+KEYCLOAK_URL=https://auth.your-domain.com
+KEYCLOAK_USERNAME=test-user
+KEYCLOAK_PASSWORD=test-password
+```
+
+### Reusing Authentication State
+
+After running tests once, the authentication state is saved to `fixtures/auth.json`. To skip the auth flow on subsequent runs:
+
+```env
+AUTH_METHOD=skip
+```
+
+## Project Structure
+
+```
+integration_tests/
+├── fixtures/               # Test fixtures and auth state
+│   └── auth.json          # Saved authentication state (generated)
+├── pages/                  # Page Object Models
+│   ├── BasePage.ts        # Base page with common utilities
+│   ├── HomePage.ts        # Home screen interactions
+│   ├── ConversationPage.ts # Conversation/chat interactions
+│   └── index.ts           # Page exports
+├── tests/                  # Test specifications
+│   ├── global-setup.ts    # Authentication setup
+│   └── smoke.spec.ts      # Smoke test suite
+├── utils/                  # Utility functions
+├── .env.example           # Environment configuration template
+├── playwright.config.ts   # Playwright configuration
+├── package.json           # Dependencies and scripts
+└── README.md              # This file
+```
+
+## Writing Tests
+
+### Using Page Objects
+
+```typescript
+import { test, expect } from "@playwright/test";
+import { HomePage, ConversationPage } from "../pages";
+
+test("example test", async ({ page }) => {
+  const homePage = new HomePage(page);
+  const conversationPage = new ConversationPage(page);
+
+  // Navigate and verify home screen
+  await homePage.goto();
+  await expect(homePage.homeScreen).toBeVisible();
+
+  // Start a conversation
+  await homePage.selectRepository("https://github.com/owner/repo");
+  await homePage.startNewConversation();
+
+  // Interact with agent
+  await conversationPage.waitForConversationReady();
+  await conversationPage.executePrompt("Your prompt here");
+  await conversationPage.verifyNoErrors();
+});
+```
+
+### Test Tags
+
+Tests are organized with tags:
+
+- `@smoke` - Core smoke tests (run by default)
+- `@critical` - Critical functionality that must always work
+
+```bash
+# Run only smoke tests
+npm run test:smoke
+
+# Run specific tag
+npx playwright test --grep @critical
+```
+
+## CI/CD Integration
+
+### GitHub Actions
+
+The tests can be run in GitHub Actions. See `.github/workflows/smoke-tests.yml`.
+
+**Required Secrets:**
+- `GITHUB_TEST_USERNAME` - Test account username
+- `GITHUB_TEST_PASSWORD` - Test account password
+
+### Example Workflow
+
+```yaml
+name: Smoke Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  schedule:
+    - cron: '0 */6 * * *'  # Every 6 hours
+
+jobs:
+  smoke-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+
+      - name: Install dependencies
+        working-directory: ./integration_tests
+        run: npm ci
+
+      - name: Install Playwright
+        working-directory: ./integration_tests
+        run: npx playwright install --with-deps chromium
+
+      - name: Run smoke tests
+        working-directory: ./integration_tests
+        env:
+          BASE_URL: https://staging.all-hands.dev
+          GITHUB_TEST_USERNAME: ${{ secrets.GITHUB_TEST_USERNAME }}
+          GITHUB_TEST_PASSWORD: ${{ secrets.GITHUB_TEST_PASSWORD }}
+        run: npm test
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: integration_tests/playwright-report/
+          retention-days: 30
+```
+
+## Troubleshooting
+
+### Authentication Fails
+
+1. Verify credentials are correct
+2. Check if 2FA is enabled (need TOTP secret)
+3. Check if account is locked or needs verification
+4. Try running `AUTH_METHOD=skip` with manual login first
+
+### Tests Timeout
+
+1. Increase timeout in `playwright.config.ts`
+2. Check if the environment is accessible
+3. Check agent response times
+
+### Debug Mode
+
+```bash
+# Run with headed browser and pause on failure
+PWDEBUG=1 npm test
+
+# Generate test code interactively
+npm run codegen
+```
+
+### View Test Report
+
+```bash
+npm run report
+```
+
+## Adding 2FA Support
+
+If your test account requires 2FA, install `otplib`:
+
+```bash
+npm install otplib
+```
+
+Then update `global-setup.ts`:
+
+```typescript
+import { authenticator } from 'otplib';
+
+async function generateTOTP(secret: string): Promise<string> {
+  return authenticator.generate(secret);
+}
+```
+
+## GitHub Resolver Integration Tests
+
+The GitHub Resolver tests verify the end-to-end flow of the resolver integration, where GitHub webhooks trigger OpenHands to work on issues and pull requests.
+
+### Architecture
+
+The tests use a **Mock GitHub Server** instead of connecting to the real GitHub API. This allows:
+
+- Complete control over webhook payloads and responses
+- Testing without requiring real GitHub credentials or installations
+- Isolation from GitHub's rate limits and service availability
+- Reproducible test scenarios
+
+### Mock GitHub Server
+
+The mock server (`mocks/github-mock-server.ts`) simulates:
+
+- GitHub REST API endpoints (repos, issues, comments, reactions)
+- GitHub App installation token generation
+- Webhook signature verification
+- Recording of outgoing responses (comments posted by the resolver)
+
+### Running GitHub Resolver Tests
+
+1. **Start the OpenHands application with enterprise features:**
+
+```bash
+# From the project root
+cd enterprise
+make start-backend
+```
+
+2. **Configure environment variables:**
+
+```bash
+# In integration_tests/.env
+GITHUB_APP_WEBHOOK_SECRET=test-webhook-secret
+APP_PORT=12000
+MOCK_GITHUB_PORT=9999
+```
+
+3. **Run the tests:**
+
+```bash
+cd integration_tests
+npm run test:github-resolver
+```
+
+### Mock Server Standalone Mode
+
+You can run the mock GitHub server standalone for debugging:
+
+```bash
+npm run mock:github
+```
+
+This starts the server on port 9999 (configurable via `MOCK_GITHUB_PORT`).
+
+### Test Endpoints
+
+The mock server exposes test control endpoints:
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/_health` | GET | Health check |
+| `/_test/webhook-events` | GET | Get recorded webhook events |
+| `/_test/outgoing-responses` | GET | Get responses posted by resolver |
+| `/_test/clear-events` | POST | Clear recorded events |
+| `/_test/reset` | POST | Reset all mock data |
+| `/_test/trigger-webhook` | POST | Trigger a webhook to target URL |
+
+### Test Scenarios
+
+The GitHub Resolver tests cover:
+
+1. **Issue Labeled** - Adding the "openhands" label to an issue
+2. **Issue Comment** - Commenting "@openhands" on an issue
+3. **PR Review Comment** - Commenting "@openhands" on a PR review
+4. **Error Handling** - Invalid signatures, missing installation IDs
+
+### Customizing Test Data
+
+Edit `mocks/github-mock-server.ts` to modify the default test data:
+
+- Repository information
+- Issue content
+- Installation configurations
+
+## Best Practices
+
+1. **Use dedicated test accounts** - Don't use personal accounts
+2. **Keep credentials secure** - Never commit `.env` files
+3. **Run tests sequentially** - Smoke tests share state
+4. **Clean up after tests** - Stop agents, close conversations
+5. **Use meaningful assertions** - Check for specific elements
+6. **Add screenshots on failure** - Helps debug CI failures
+
+## Contributing
+
+When adding new tests:
+
+1. Add new Page Objects for new pages/features
+2. Follow existing naming conventions
+3. Use appropriate test tags
+4. Document any new environment variables
+5. Update this README if needed
@@ -0,0 +1,2 @@
+# This file ensures the fixtures directory is tracked by git
+# The auth.json file will be generated during test setup
@@ -0,0 +1,705 @@
+/**
+ * Mock GitHub Server for Integration Testing
+ *
+ * This server simulates GitHub API endpoints used by the OpenHands resolver:
+ * - GitHub App webhooks (issue labeled, issue comment, PR comment, etc.)
+ * - GitHub REST API endpoints (repos, issues, comments, pulls)
+ * - GitHub GraphQL API
+ *
+ * The mock server allows testing the resolver integration without connecting
+ * to the real GitHub service.
+ */
+
+import http from "http";
+import crypto from "crypto";
+
+// Types for mock data
+interface MockIssue {
+  number: number;
+  title: string;
+  body: string;
+  state: "open" | "closed";
+  labels: Array<{ name: string; id: number }>;
+  user: { login: string; id: number };
+  created_at: string;
+  updated_at: string;
+  comments: MockComment[];
+  reactions: string[];
+}
+
+interface MockComment {
+  id: number;
+  body: string;
+  user: { login: string; id: number };
+  created_at: string;
+}
+
+interface MockRepository {
+  id: number;
+  name: string;
+  full_name: string;
+  private: boolean;
+  owner: { login: string; id: number };
+  default_branch: string;
+  node_id: string;
+}
+
+interface MockInstallation {
+  id: number;
+  account: { login: string; id: number };
+  repositories: MockRepository[];
+  access_token: string;
+}
+
+interface WebhookEvent {
+  action: string;
+  payload: Record<string, unknown>;
+  timestamp: string;
+}
+
+// Mock data store
+class MockGitHubDataStore {
+  private repositories: Map<string, MockRepository> = new Map();
+
+  private issues: Map<string, Map<number, MockIssue>> = new Map();
+
+  private installations: Map<number, MockInstallation> = new Map();
+
+  private webhookEvents: WebhookEvent[] = [];
+
+  private nextCommentId = 1000;
+
+  private outgoingWebhookResponses: Array<{
+    body: string;
+    timestamp: string;
+  }> = [];
+
+  constructor() {
+    this.initializeDefaultData();
+  }
+
+  private initializeDefaultData() {
+    // Create a default test repository
+    const testRepo: MockRepository = {
+      id: 123456789,
+      name: "test-repo",
+      full_name: "test-owner/test-repo",
+      private: false,
+      owner: { login: "test-owner", id: 1000 },
+      default_branch: "main",
+      node_id: "R_kgDOTest123",
+    };
+    this.repositories.set(testRepo.full_name, testRepo);
+
+    // Create a test issue
+    const testIssue: MockIssue = {
+      number: 1,
+      title: "Test Issue for OpenHands Resolver",
+      body: "This is a test issue to verify the resolver integration works correctly. Please add a README file.",
+      state: "open",
+      labels: [],
+      user: { login: "test-user", id: 2000 },
+      created_at: new Date().toISOString(),
+      updated_at: new Date().toISOString(),
+      comments: [],
+      reactions: [],
+    };
+    this.issues.set(testRepo.full_name, new Map([[1, testIssue]]));
+
+    // Create a default installation
+    const testInstallation: MockInstallation = {
+      id: 12345,
+      account: { login: "test-owner", id: 1000 },
+      repositories: [testRepo],
+      access_token: "ghs_mock_installation_token_12345",
+    };
+    this.installations.set(testInstallation.id, testInstallation);
+  }
+
+  getRepository(fullName: string): MockRepository | undefined {
+    return this.repositories.get(fullName);
+  }
+
+  getIssue(fullName: string, issueNumber: number): MockIssue | undefined {
+    return this.issues.get(fullName)?.get(issueNumber);
+  }
+
+  getIssues(fullName: string): MockIssue[] {
+    const repoIssues = this.issues.get(fullName);
+    return repoIssues ? Array.from(repoIssues.values()) : [];
+  }
+
+  addComment(
+    fullName: string,
+    issueNumber: number,
+    body: string,
+    user: { login: string; id: number },
+  ): MockComment {
+    const issue = this.getIssue(fullName, issueNumber);
+    if (!issue) throw new Error(`Issue not found: ${fullName}#${issueNumber}`);
+
+    const comment: MockComment = {
+      id: this.nextCommentId++,
+      body,
+      user,
+      created_at: new Date().toISOString(),
+    };
+    issue.comments.push(comment);
+    issue.updated_at = new Date().toISOString();
+    return comment;
+  }
+
+  addReaction(fullName: string, issueNumber: number, reaction: string): void {
+    const issue = this.getIssue(fullName, issueNumber);
+    if (issue) {
+      issue.reactions.push(reaction);
+    }
+  }
+
+  addLabel(fullName: string, issueNumber: number, label: string): void {
+    const issue = this.getIssue(fullName, issueNumber);
+    if (issue) {
+      issue.labels.push({ name: label, id: Date.now() });
+      issue.updated_at = new Date().toISOString();
+    }
+  }
+
+  getInstallation(id: number): MockInstallation | undefined {
+    return this.installations.get(id);
+  }
+
+  getAllRepositories(): MockRepository[] {
+    return Array.from(this.repositories.values());
+  }
+
+  recordWebhookEvent(action: string, payload: Record<string, unknown>): void {
+    this.webhookEvents.push({
+      action,
+      payload,
+      timestamp: new Date().toISOString(),
+    });
+  }
+
+  getWebhookEvents(): WebhookEvent[] {
+    return this.webhookEvents;
+  }
+
+  recordOutgoingWebhookResponse(body: string): void {
+    this.outgoingWebhookResponses.push({
+      body,
+      timestamp: new Date().toISOString(),
+    });
+  }
+
+  getOutgoingWebhookResponses(): Array<{ body: string; timestamp: string }> {
+    return this.outgoingWebhookResponses;
+  }
+
+  clearEvents(): void {
+    this.webhookEvents = [];
+    this.outgoingWebhookResponses = [];
+  }
+
+  reset(): void {
+    this.repositories.clear();
+    this.issues.clear();
+    this.installations.clear();
+    this.webhookEvents = [];
+    this.outgoingWebhookResponses = [];
+    this.nextCommentId = 1000;
+    this.initializeDefaultData();
+  }
+}
+
+const dataStore = new MockGitHubDataStore();
+
+// Webhook secret for signature verification
+const WEBHOOK_SECRET =
+  process.env.MOCK_GITHUB_WEBHOOK_SECRET || "test-webhook-secret";
+
+// Generate webhook signature
+function generateWebhookSignature(payload: string): string {
+  const hmac = crypto.createHmac("sha256", WEBHOOK_SECRET);
+  hmac.update(payload);
+  return `sha256=${hmac.digest("hex")}`;
+}
+
+// Parse URL path and extract params
+function parseRoute(
+  url: string,
+  pattern: RegExp,
+): Record<string, string> | null {
+  const match = url.match(pattern);
+  if (!match) return null;
+  return match.groups || {};
+}
+
+// JSON response helper
+function jsonResponse(
+  res: http.ServerResponse,
+  data: unknown,
+  status = 200,
+): void {
+  res.writeHead(status, { "Content-Type": "application/json" });
+  res.end(JSON.stringify(data));
+}
+
+// Parse request body
+async function parseBody(req: http.IncomingMessage): Promise<string> {
+  return new Promise((resolve, reject) => {
+    let body = "";
+    req.on("data", (chunk) => {
+      body += chunk;
+    });
+    req.on("end", () => resolve(body));
+    req.on("error", reject);
+  });
+}
+
+// Request handlers
+const handlers: Array<{
+  method: string;
+  pattern: RegExp;
+  handler: (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    params: Record<string, string>,
+    body?: unknown,
+  ) => Promise<void> | void;
+}> = [
+  // GitHub App root endpoint
+  {
+    method: "GET",
+    pattern: /^\/app$/,
+    handler: (_req, res) => {
+      jsonResponse(res, {
+        id: 123456,
+        slug: "openhands-test-app",
+        name: "OpenHands Test App",
+        owner: { login: "test-owner", id: 1000 },
+        permissions: {
+          issues: "write",
+          pull_requests: "write",
+          contents: "write",
+        },
+      });
+    },
+  },
+
+  // Get repository
+  {
+    method: "GET",
+    pattern: /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const repo = dataStore.getRepository(fullName);
+      if (repo) {
+        jsonResponse(res, repo);
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Get issue
+  {
+    method: "GET",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issue = dataStore.getIssue(fullName, parseInt(params.number, 10));
+      if (issue) {
+        const repo = dataStore.getRepository(fullName);
+        jsonResponse(res, {
+          ...issue,
+          url: `https://api.github.com/repos/${fullName}/issues/${issue.number}`,
+          html_url: `https://github.com/${fullName}/issues/${issue.number}`,
+          repository: repo,
+        });
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // List issues
+  {
+    method: "GET",
+    pattern: /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issues = dataStore.getIssues(fullName);
+      jsonResponse(res, issues);
+    },
+  },
+
+  // Get issue comments
+  {
+    method: "GET",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/comments$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issue = dataStore.getIssue(fullName, parseInt(params.number, 10));
+      if (issue) {
+        jsonResponse(res, issue.comments);
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Create issue comment
+  {
+    method: "POST",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/comments$/,
+    handler: async (_req, res, params, body) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issueNumber = parseInt(params.number, 10);
+      const requestBody = body as { body: string };
+
+      try {
+        const comment = dataStore.addComment(
+          fullName,
+          issueNumber,
+          requestBody.body,
+          {
+            login: "openhands[bot]",
+            id: 99999,
+          },
+        );
+
+        // Record this as an outgoing response (the resolver posting back)
+        dataStore.recordOutgoingWebhookResponse(requestBody.body);
+
+        jsonResponse(res, comment, 201);
+      } catch {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Create issue reaction
+  {
+    method: "POST",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/reactions$/,
+    handler: async (_req, res, params, body) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issueNumber = parseInt(params.number, 10);
+      const requestBody = body as { content: string };
+
+      dataStore.addReaction(fullName, issueNumber, requestBody.content);
+      jsonResponse(res, { id: Date.now(), content: requestBody.content }, 201);
+    },
+  },
+
+  // Add issue label
+  {
+    method: "POST",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/labels$/,
+    handler: async (_req, res, params, body) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issueNumber = parseInt(params.number, 10);
+      const requestBody = body as { labels: string[] };
+
+      const issue = dataStore.getIssue(fullName, issueNumber);
+      if (issue) {
+        requestBody.labels.forEach((label) =>
+          dataStore.addLabel(fullName, issueNumber, label),
+        );
+        jsonResponse(res, issue.labels, 201);
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Get installation access token
+  {
+    method: "POST",
+    pattern: /^\/app\/installations\/(?<installation_id>\d+)\/access_tokens$/,
+    handler: (_req, res, params) => {
+      const installation = dataStore.getInstallation(
+        parseInt(params.installation_id, 10),
+      );
+      if (installation) {
+        jsonResponse(
+          res,
+          {
+            token: installation.access_token,
+            expires_at: new Date(Date.now() + 3600000).toISOString(),
+            permissions: {
+              issues: "write",
+              pull_requests: "write",
+              contents: "write",
+            },
+            repository_selection: "all",
+          },
+          201,
+        );
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Get installation repositories
+  {
+    method: "GET",
+    pattern: /^\/installation\/repositories$/,
+    handler: (_req, res) => {
+      // Return all repositories from all installations
+      const repos = dataStore.getAllRepositories();
+      jsonResponse(res, {
+        total_count: repos.length,
+        repositories: repos,
+      });
+    },
+  },
+
+  // Get user
+  {
+    method: "GET",
+    pattern: /^\/user$/,
+    handler: (_req, res) => {
+      jsonResponse(res, {
+        id: 2000,
+        login: "test-user",
+        avatar_url: "https://avatars.githubusercontent.com/u/2000",
+        name: "Test User",
+        email: "test-user@example.com",
+      });
+    },
+  },
+
+  // Get user by username
+  {
+    method: "GET",
+    pattern: /^\/users\/(?<username>[^/]+)$/,
+    handler: (_req, res, params) => {
+      jsonResponse(res, {
+        id: 2000,
+        login: params.username,
+        avatar_url: `https://avatars.githubusercontent.com/u/2000`,
+        name: params.username,
+      });
+    },
+  },
+
+  // Get repository collaborator permission
+  {
+    method: "GET",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/collaborators\/(?<username>[^/]+)\/permission$/,
+    handler: (_req, res) => {
+      jsonResponse(res, {
+        permission: "write",
+        user: { login: "test-user", id: 2000 },
+      });
+    },
+  },
+
+  // GraphQL endpoint
+  {
+    method: "POST",
+    pattern: /^\/graphql$/,
+    handler: async (_req, res, _params, _body) => {
+      // Return a basic response for common queries
+      // The body would contain { query: string, variables?: Record<string, unknown> }
+      jsonResponse(res, {
+        data: {
+          repository: {
+            id: "R_kgDOTest123",
+            name: "test-repo",
+            owner: { login: "test-owner" },
+          },
+        },
+      });
+    },
+  },
+
+  // Test control endpoints - Get webhook events
+  {
+    method: "GET",
+    pattern: /^\/_test\/webhook-events$/,
+    handler: (_req, res) => {
+      jsonResponse(res, dataStore.getWebhookEvents());
+    },
+  },
+
+  // Test control endpoints - Get outgoing webhook responses
+  {
+    method: "GET",
+    pattern: /^\/_test\/outgoing-responses$/,
+    handler: (_req, res) => {
+      jsonResponse(res, dataStore.getOutgoingWebhookResponses());
+    },
+  },
+
+  // Test control endpoints - Clear events
+  {
+    method: "POST",
+    pattern: /^\/_test\/clear-events$/,
+    handler: (_req, res) => {
+      dataStore.clearEvents();
+      jsonResponse(res, { status: "cleared" });
+    },
+  },
+
+  // Test control endpoints - Reset data
+  {
+    method: "POST",
+    pattern: /^\/_test\/reset$/,
+    handler: (_req, res) => {
+      dataStore.reset();
+      jsonResponse(res, { status: "reset" });
+    },
+  },
+
+  // Test control endpoints - Trigger webhook
+  {
+    method: "POST",
+    pattern: /^\/_test\/trigger-webhook$/,
+    handler: async (req, res, _params, body) => {
+      const { targetUrl, eventType, payload } = body as {
+        targetUrl: string;
+        eventType: string;
+        payload: Record<string, unknown>;
+      };
+
+      // Record the webhook event
+      dataStore.recordWebhookEvent(eventType, payload);
+
+      // Send the webhook to the target URL
+      const payloadString = JSON.stringify(payload);
+      const signature = generateWebhookSignature(payloadString);
+
+      try {
+        const response = await fetch(targetUrl, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "X-GitHub-Event": eventType,
+            "X-Hub-Signature-256": signature,
+            "X-GitHub-Delivery": crypto.randomUUID(),
+          },
+          body: payloadString,
+        });
+
+        const responseText = await response.text();
+        jsonResponse(res, {
+          status: "sent",
+          targetUrl,
+          eventType,
+          responseStatus: response.status,
+          responseBody: responseText,
+        });
+      } catch (error) {
+        jsonResponse(
+          res,
+          {
+            status: "error",
+            error: (error as Error).message,
+          },
+          500,
+        );
+      }
+    },
+  },
+
+  // Health check
+  {
+    method: "GET",
+    pattern: /^\/_health$/,
+    handler: (_req, res) => {
+      jsonResponse(res, { status: "healthy" });
+    },
+  },
+];
+
+// Create HTTP server
+const server = http.createServer(async (req, res) => {
+  const url = req.url || "/";
+  const method = req.method || "GET";
+
+  // Handle CORS preflight
+  if (method === "OPTIONS") {
+    res.writeHead(204, {
+      "Access-Control-Allow-Origin": "*",
+      "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
+      "Access-Control-Allow-Headers": "*",
+    });
+    res.end();
+    return;
+  }
+
+  // Add CORS headers to all responses
+  res.setHeader("Access-Control-Allow-Origin", "*");
+  res.setHeader("Access-Control-Allow-Headers", "*");
+
+  // Parse body for POST/PUT requests
+  let body: unknown;
+  if (method === "POST" || method === "PUT") {
+    const rawBody = await parseBody(req);
+    try {
+      body = JSON.parse(rawBody);
+    } catch {
+      body = rawBody;
+    }
+  }
+
+  // Try to match a handler
+  for (const handler of handlers) {
+    if (handler.method === method) {
+      const params = parseRoute(url.split("?")[0], handler.pattern);
+      if (params !== null) {
+        try {
+          await handler.handler(req, res, params, body);
+          return;
+        } catch (error) {
+          console.error(`Error handling ${method} ${url}:`, error);
+          jsonResponse(res, { error: "Internal Server Error" }, 500);
+          return;
+        }
+      }
+    }
+  }
+
+  // No handler found
+  console.log(`No handler for ${method} ${url}`);
+  jsonResponse(res, { message: "Not Found", path: url }, 404);
+});
+
+// Start server
+const PORT = parseInt(process.env.MOCK_GITHUB_PORT || "9999", 10);
+
+server.listen(PORT, () => {
+  console.log(`Mock GitHub Server running on port ${PORT}`);
+  console.log(`Webhook secret: ${WEBHOOK_SECRET}`);
+  console.log("\nAvailable endpoints:");
+  console.log("  GET  /_health                    - Health check");
+  console.log(
+    "  GET  /_test/webhook-events       - Get recorded webhook events",
+  );
+  console.log(
+    "  GET  /_test/outgoing-responses   - Get responses posted by the resolver",
+  );
+  console.log("  POST /_test/clear-events         - Clear recorded events");
+  console.log("  POST /_test/reset                - Reset all mock data");
+  console.log(
+    "  POST /_test/trigger-webhook      - Trigger a webhook to target URL",
+  );
+  console.log("\nGitHub API endpoints:");
+  console.log("  GET  /repos/:owner/:repo");
+  console.log("  GET  /repos/:owner/:repo/issues/:number");
+  console.log("  POST /repos/:owner/:repo/issues/:number/comments");
+  console.log("  POST /repos/:owner/:repo/issues/:number/reactions");
+  console.log("  POST /app/installations/:id/access_tokens");
+  console.log("  POST /graphql");
+});
+
+export { server, dataStore, generateWebhookSignature, WEBHOOK_SECRET };
@@ -0,0 +1,281 @@
+/**
+ * GitHub Webhook Payload Templates
+ *
+ * These templates mirror the webhook payloads that GitHub sends for various events.
+ * They're used to test the OpenHands resolver integration.
+ */
+
+export interface GitHubWebhookPayload {
+  action: string;
+  installation: { id: number };
+  repository: {
+    id: number;
+    name: string;
+    full_name: string;
+    private: boolean;
+    owner: { login: string; id: number };
+    default_branch: string;
+  };
+  sender: { login: string; id: number };
+  [key: string]: unknown;
+}
+
+export interface IssuePayload extends GitHubWebhookPayload {
+  issue: {
+    number: number;
+    title: string;
+    body: string;
+    state: string;
+    labels: Array<{ name: string; id: number }>;
+    user: { login: string; id: number };
+  };
+  label?: { name: string; id: number };
+}
+
+export interface IssueCommentPayload extends GitHubWebhookPayload {
+  issue: {
+    number: number;
+    title: string;
+    body: string;
+    state: string;
+    labels: Array<{ name: string; id: number }>;
+    user: { login: string; id: number };
+    pull_request?: { url: string };
+  };
+  comment: {
+    id: number;
+    body: string;
+    user: { login: string; id: number };
+  };
+}
+
+export interface PullRequestReviewCommentPayload extends GitHubWebhookPayload {
+  pull_request: {
+    number: number;
+    title: string;
+    body: string;
+    state: string;
+    head: { ref: string; sha: string };
+    base: { ref: string };
+    user: { login: string; id: number };
+  };
+  comment: {
+    id: number;
+    node_id: string;
+    body: string;
+    path: string;
+    line: number;
+    user: { login: string; id: number };
+  };
+}
+
+/**
+ * Create a base webhook payload with common fields
+ */
+function createBasePayload(params: {
+  installationId?: number;
+  repositoryId?: number;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+  isPrivate?: boolean;
+}): GitHubWebhookPayload {
+  const {
+    installationId = 12345,
+    repositoryId = 123456789,
+    repositoryName = "test-repo",
+    repositoryOwner = "test-owner",
+    senderLogin = "test-user",
+    senderId = 2000,
+    isPrivate = false,
+  } = params;
+
+  return {
+    action: "",
+    installation: { id: installationId },
+    repository: {
+      id: repositoryId,
+      name: repositoryName,
+      full_name: `${repositoryOwner}/${repositoryName}`,
+      private: isPrivate,
+      owner: { login: repositoryOwner, id: 1000 },
+      default_branch: "main",
+    },
+    sender: { login: senderLogin, id: senderId },
+  };
+}
+
+/**
+ * Create a payload for an issue being labeled with the OpenHands label
+ */
+export function createIssueLabeledPayload(params: {
+  installationId?: number;
+  issueNumber?: number;
+  issueTitle?: string;
+  issueBody?: string;
+  labelName?: string;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+}): IssuePayload {
+  const {
+    issueNumber = 1,
+    issueTitle = "Test Issue for OpenHands Resolver",
+    issueBody = "This is a test issue. Please add a README file.",
+    labelName = "openhands",
+    senderLogin = "test-user",
+    senderId = 2000,
+    ...rest
+  } = params;
+
+  const base = createBasePayload({ senderLogin, senderId, ...rest });
+  const label = { name: labelName, id: Date.now() };
+
+  return {
+    ...base,
+    action: "labeled",
+    issue: {
+      number: issueNumber,
+      title: issueTitle,
+      body: issueBody,
+      state: "open",
+      labels: [label],
+      user: { login: senderLogin, id: senderId },
+    },
+    label,
+  };
+}
+
+/**
+ * Create a payload for an issue comment mentioning @openhands
+ */
+export function createIssueCommentPayload(params: {
+  installationId?: number;
+  issueNumber?: number;
+  issueTitle?: string;
+  issueBody?: string;
+  commentBody?: string;
+  commentId?: number;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+  isPullRequest?: boolean;
+}): IssueCommentPayload {
+  const {
+    issueNumber = 1,
+    issueTitle = "Test Issue for OpenHands Resolver",
+    issueBody = "This is a test issue.",
+    commentBody = "@openhands please add a README file",
+    commentId = 1001,
+    senderLogin = "test-user",
+    senderId = 2000,
+    isPullRequest = false,
+    ...rest
+  } = params;
+
+  const base = createBasePayload({ senderLogin, senderId, ...rest });
+
+  const payload: IssueCommentPayload = {
+    ...base,
+    action: "created",
+    issue: {
+      number: issueNumber,
+      title: issueTitle,
+      body: issueBody,
+      state: "open",
+      labels: [],
+      user: { login: "issue-creator", id: 3000 },
+    },
+    comment: {
+      id: commentId,
+      body: commentBody,
+      user: { login: senderLogin, id: senderId },
+    },
+  };
+
+  if (isPullRequest) {
+    payload.issue.pull_request = {
+      url: `https://api.github.com/repos/${base.repository.full_name}/pulls/${issueNumber}`,
+    };
+  }
+
+  return payload;
+}
+
+/**
+ * Create a payload for a PR review comment mentioning @openhands
+ */
+export function createPullRequestReviewCommentPayload(params: {
+  installationId?: number;
+  prNumber?: number;
+  prTitle?: string;
+  prBody?: string;
+  commentBody?: string;
+  commentId?: number;
+  filePath?: string;
+  lineNumber?: number;
+  headBranch?: string;
+  baseBranch?: string;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+}): PullRequestReviewCommentPayload {
+  const {
+    prNumber = 2,
+    prTitle = "Test PR for OpenHands Resolver",
+    prBody = "This is a test PR.",
+    commentBody = "@openhands please fix this code",
+    commentId = 2001,
+    filePath = "src/main.ts",
+    lineNumber = 10,
+    headBranch = "feature-branch",
+    baseBranch = "main",
+    senderLogin = "test-user",
+    senderId = 2000,
+    ...rest
+  } = params;
+
+  const base = createBasePayload({ senderLogin, senderId, ...rest });
+
+  return {
+    ...base,
+    action: "created",
+    pull_request: {
+      number: prNumber,
+      title: prTitle,
+      body: prBody,
+      state: "open",
+      head: { ref: headBranch, sha: "abc123def456" },
+      base: { ref: baseBranch },
+      user: { login: "pr-creator", id: 4000 },
+    },
+    comment: {
+      id: commentId,
+      node_id: `PRRC_${commentId}`,
+      body: commentBody,
+      path: filePath,
+      line: lineNumber,
+      user: { login: senderLogin, id: senderId },
+    },
+  };
+}
+
+/**
+ * Get the GitHub event type for a payload
+ */
+export function getEventType(payload: GitHubWebhookPayload): string {
+  if ("comment" in payload && "pull_request" in payload) {
+    return "pull_request_review_comment";
+  }
+  if ("comment" in payload) {
+    return "issue_comment";
+  }
+  if ("issue" in payload) {
+    return "issues";
+  }
+  return "unknown";
+}
@@ -0,0 +1,6 @@
+/**
+ * Mock Server Exports
+ */
+
+export * from "./github-webhook-payloads";
+export * from "./mock-github-client";
@@ -0,0 +1,223 @@
+/**
+ * Mock GitHub Client
+ *
+ * Client utilities for interacting with the mock GitHub server during tests.
+ */
+
+import {
+  createIssueLabeledPayload,
+  createIssueCommentPayload,
+  createPullRequestReviewCommentPayload,
+  getEventType,
+  GitHubWebhookPayload,
+} from "./github-webhook-payloads";
+
+export interface MockGitHubClientConfig {
+  mockServerUrl: string;
+  webhookTargetUrl: string;
+}
+
+export interface TriggerWebhookResult {
+  status: string;
+  targetUrl: string;
+  eventType: string;
+  responseStatus: number;
+  responseBody: string;
+}
+
+export interface WebhookEvent {
+  action: string;
+  payload: Record<string, unknown>;
+  timestamp: string;
+}
+
+export interface OutgoingResponse {
+  body: string;
+  timestamp: string;
+}
+
+/**
+ * Client for interacting with the Mock GitHub Server
+ */
+export class MockGitHubClient {
+  private mockServerUrl: string;
+
+  private webhookTargetUrl: string;
+
+  constructor(config: MockGitHubClientConfig) {
+    this.mockServerUrl = config.mockServerUrl;
+    this.webhookTargetUrl = config.webhookTargetUrl;
+  }
+
+  /**
+   * Check if the mock server is healthy
+   */
+  async healthCheck(): Promise<boolean> {
+    try {
+      const response = await fetch(`${this.mockServerUrl}/_health`);
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Wait for the mock server to be ready
+   */
+  async waitForReady(timeoutMs = 30000): Promise<void> {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+      if (await this.healthCheck()) {
+        return;
+      }
+      await new Promise((resolve) => setTimeout(resolve, 500));
+    }
+    throw new Error(`Mock GitHub server not ready after ${timeoutMs}ms`);
+  }
+
+  /**
+   * Trigger a webhook to the target URL
+   */
+  async triggerWebhook(
+    payload: GitHubWebhookPayload,
+  ): Promise<TriggerWebhookResult> {
+    const eventType = getEventType(payload);
+
+    const response = await fetch(
+      `${this.mockServerUrl}/_test/trigger-webhook`,
+      {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          targetUrl: this.webhookTargetUrl,
+          eventType,
+          payload,
+        }),
+      },
+    );
+
+    return response.json();
+  }
+
+  /**
+   * Trigger an issue labeled event (simulates adding the openhands label)
+   */
+  async triggerIssueLabeledEvent(
+    params?: Parameters<typeof createIssueLabeledPayload>[0],
+  ): Promise<TriggerWebhookResult> {
+    const payload = createIssueLabeledPayload(params || {});
+    return this.triggerWebhook(payload);
+  }
+
+  /**
+   * Trigger an issue comment event (simulates @openhands mention in issue)
+   */
+  async triggerIssueCommentEvent(
+    params?: Parameters<typeof createIssueCommentPayload>[0],
+  ): Promise<TriggerWebhookResult> {
+    const payload = createIssueCommentPayload(params || {});
+    return this.triggerWebhook(payload);
+  }
+
+  /**
+   * Trigger a PR review comment event (simulates @openhands mention in PR)
+   */
+  async triggerPRReviewCommentEvent(
+    params?: Parameters<typeof createPullRequestReviewCommentPayload>[0],
+  ): Promise<TriggerWebhookResult> {
+    const payload = createPullRequestReviewCommentPayload(params || {});
+    return this.triggerWebhook(payload);
+  }
+
+  /**
+   * Get all recorded webhook events
+   */
+  async getWebhookEvents(): Promise<WebhookEvent[]> {
+    const response = await fetch(`${this.mockServerUrl}/_test/webhook-events`);
+    return response.json();
+  }
+
+  /**
+   * Get all outgoing responses (comments posted by the resolver)
+   */
+  async getOutgoingResponses(): Promise<OutgoingResponse[]> {
+    const response = await fetch(
+      `${this.mockServerUrl}/_test/outgoing-responses`,
+    );
+    return response.json();
+  }
+
+  /**
+   * Clear all recorded events
+   */
+  async clearEvents(): Promise<void> {
+    await fetch(`${this.mockServerUrl}/_test/clear-events`, { method: "POST" });
+  }
+
+  /**
+   * Reset all mock data to initial state
+   */
+  async reset(): Promise<void> {
+    await fetch(`${this.mockServerUrl}/_test/reset`, { method: "POST" });
+  }
+
+  /**
+   * Wait for the resolver to post a response
+   * @param timeoutMs Maximum time to wait
+   * @param expectedCount Number of responses to wait for (default: 1)
+   * @param checkIntervalMs How often to check for responses
+   */
+  async waitForResponse(
+    timeoutMs = 120000,
+    expectedCount = 1,
+    checkIntervalMs = 2000,
+  ): Promise<OutgoingResponse[]> {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+      const responses = await this.getOutgoingResponses();
+      if (responses.length >= expectedCount) {
+        return responses;
+      }
+      await new Promise((resolve) => setTimeout(resolve, checkIntervalMs));
+    }
+    throw new Error(
+      `Timed out waiting for ${expectedCount} response(s) after ${timeoutMs}ms`,
+    );
+  }
+
+  /**
+   * Wait for a response containing specific text
+   */
+  async waitForResponseContaining(
+    expectedText: string,
+    timeoutMs = 120000,
+    checkIntervalMs = 2000,
+  ): Promise<OutgoingResponse> {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+      const responses = await this.getOutgoingResponses();
+      for (const response of responses) {
+        if (response.body.includes(expectedText)) {
+          return response;
+        }
+      }
+      await new Promise((resolve) => setTimeout(resolve, checkIntervalMs));
+    }
+    throw new Error(
+      `Timed out waiting for response containing "${expectedText}" after ${timeoutMs}ms`,
+    );
+  }
+}
+
+/**
+ * Create a MockGitHubClient with default configuration
+ */
+export function createMockGitHubClient(
+  mockServerPort = 9999,
+  appPort = 12000,
+): MockGitHubClient {
+  return new MockGitHubClient({
+    mockServerUrl: `http://localhost:${mockServerPort}`,
+    webhookTargetUrl: `http://localhost:${appPort}/api/integration/github/events`,
+  });
+}
@@ -0,0 +1,47 @@
+{
+  "name": "openhands-integration-tests",
+  "version": "1.0.0",
+  "description": "Integration and smoke tests for OpenHands using Playwright",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "test": "playwright test",
+    "test:smoke": "playwright test --grep @smoke",
+    "test:github-resolver": "playwright test --grep @github-resolver",
+    "test:staging": "cross-env BASE_URL=https://staging.all-hands.dev playwright test",
+    "test:production": "cross-env BASE_URL=https://app.all-hands.dev playwright test",
+    "test:feature": "playwright test",
+    "test:headed": "playwright test --headed",
+    "test:debug": "playwright test --debug",
+    "test:ui": "playwright test --ui",
+    "setup:auth": "playwright test --project=setup",
+    "report": "playwright show-report",
+    "codegen": "playwright codegen",
+    "typecheck": "tsc --noEmit",
+    "lint": "npm run typecheck && eslint . --ext .ts && prettier --check \"**/*.ts\"",
+    "lint:fix": "eslint . --ext .ts --fix && prettier --write \"**/*.ts\"",
+    "mock:github": "tsx mocks/github-mock-server.ts",
+    "mock:github:start": "tsx mocks/github-mock-server.ts &"
+  },
+  "devDependencies": {
+    "@playwright/test": "^1.57.0",
+    "@types/node": "^22.0.0",
+    "@typescript-eslint/eslint-plugin": "^7.18.0",
+    "@typescript-eslint/parser": "^7.18.0",
+    "cross-env": "^7.0.3",
+    "dotenv": "^16.4.5",
+    "eslint": "^8.57.0",
+    "eslint-config-airbnb-base": "^15.0.0",
+    "eslint-config-airbnb-typescript": "^18.0.0",
+    "eslint-config-prettier": "^9.1.0",
+    "eslint-plugin-import": "^2.31.0",
+    "eslint-plugin-prettier": "^5.2.1",
+    "eslint-plugin-unused-imports": "^3.2.0",
+    "prettier": "^3.4.2",
+    "tsx": "^4.19.0",
+    "typescript": "^5.6.0"
+  },
+  "engines": {
+    "node": ">=20.0.0"
+  }
+}
@@ -0,0 +1,89 @@
+import { Page, Locator, expect } from "@playwright/test";
+
+/**
+ * Base page object class that provides common functionality
+ * for all page objects in the test suite.
+ */
+export class BasePage {
+  readonly page: Page;
+
+  constructor(page: Page) {
+    this.page = page;
+  }
+
+  /**
+   * Navigate to a specific path
+   */
+  async goto(path: string = "/"): Promise<void> {
+    await this.page.goto(path);
+    await this.waitForPageLoad();
+  }
+
+  /**
+   * Wait for the page to fully load
+   */
+  async waitForPageLoad(): Promise<void> {
+    await this.page
+      .waitForLoadState("networkidle", { timeout: 30_000 })
+      .catch(() => {});
+    await this.page.waitForLoadState("domcontentloaded");
+  }
+
+  /**
+   * Wait for an element to be visible
+   */
+  async waitForElement(
+    locator: Locator,
+    timeout: number = 30_000,
+  ): Promise<void> {
+    await expect(locator).toBeVisible({ timeout });
+  }
+
+  /**
+   * Wait for an element to be hidden
+   */
+  async waitForElementHidden(
+    locator: Locator,
+    timeout: number = 30_000,
+  ): Promise<void> {
+    await expect(locator).toBeHidden({ timeout });
+  }
+
+  /**
+   * Take a screenshot with a descriptive name
+   */
+  async screenshot(name: string): Promise<void> {
+    await this.page.screenshot({
+      path: `test-results/screenshots/${name}-${Date.now()}.png`,
+      fullPage: true,
+    });
+  }
+
+  /**
+   * Check if an error banner is visible
+   */
+  async hasError(): Promise<boolean> {
+    const errorBanner = this.page.getByTestId("error-message-banner");
+    return errorBanner.isVisible().catch(() => false);
+  }
+
+  /**
+   * Get error message if error banner is present
+   */
+  async getErrorMessage(): Promise<string | null> {
+    const errorBanner = this.page.getByTestId("error-message-banner");
+    if (await errorBanner.isVisible().catch(() => false)) {
+      return errorBanner.textContent();
+    }
+    return null;
+  }
+
+  /**
+   * Wait for network to be idle
+   */
+  async waitForNetworkIdle(timeout: number = 10_000): Promise<void> {
+    await this.page
+      .waitForLoadState("networkidle", { timeout })
+      .catch(() => {});
+  }
+}
@@ -0,0 +1,326 @@
+import { Page, Locator, expect } from "@playwright/test";
+import { BasePage } from "./BasePage";
+
+/**
+ * Agent states that can be observed during conversation
+ */
+export enum AgentState {
+  LOADING = "loading",
+  RUNNING = "running",
+  AWAITING_USER_INPUT = "awaiting_user_input",
+  AWAITING_USER_CONFIRMATION = "awaiting_user_confirmation",
+  FINISHED = "finished",
+  ERROR = "error",
+  PAUSED = "paused",
+  STOPPED = "stopped",
+  INIT = "init",
+}
+
+/**
+ * Page object for the Conversation/Chat interface where users
+ * interact with the OpenHands agent.
+ */
+export class ConversationPage extends BasePage {
+  // Main containers
+  readonly appRoute: Locator;
+
+  readonly chatBox: Locator;
+
+  // Chat input elements
+  readonly chatInput: Locator;
+
+  readonly sendButton: Locator;
+
+  readonly stopButton: Locator;
+
+  // Message elements
+  readonly errorBanner: Locator;
+
+  readonly waitingMessage: Locator;
+
+  // Status indicators
+  readonly statusIndicator: Locator;
+
+  constructor(page: Page) {
+    super(page);
+
+    this.appRoute = page.getByTestId("app-route");
+    this.chatBox = page.getByTestId("interactive-chat-box");
+    this.chatInput = page.getByTestId("chat-input");
+    this.sendButton = page
+      .locator(
+        'button[type="submit"], button:has-text("Send"), [data-testid*="send"]',
+      )
+      .first();
+    this.stopButton = page
+      .locator('button:has-text("Stop"), [data-testid*="stop"]')
+      .first();
+    this.errorBanner = page.getByTestId("error-message-banner");
+    this.waitingMessage = page.locator('[data-testid*="waiting"]').first();
+    this.statusIndicator = page.getByTestId("status-icon");
+  }
+
+  /**
+   * Navigate to a specific conversation
+   */
+  async gotoConversation(conversationId: string): Promise<void> {
+    await super.goto(`/conversation/${conversationId}`);
+    await this.waitForConversationReady();
+  }
+
+  /**
+   * Wait for conversation interface to be ready for input
+   */
+  async waitForConversationReady(timeout: number = 30_000): Promise<void> {
+    // Wait for the chat interface to appear
+    await expect(this.chatBox).toBeVisible({ timeout });
+
+    // Wait for the chat input to be visible
+    await expect(this.chatInput).toBeVisible({ timeout });
+
+    // Wait for agent to be ready by checking for "Waiting for task" text
+    // Note: Using text search since data-testid is not yet deployed to staging
+    const waitingForTaskText = this.page.getByText("Waiting for task");
+    await expect(waitingForTaskText).toBeVisible({ timeout });
+  }
+
+  /**
+   * Wait for the agent to be ready to receive input
+   */
+  async waitForAgentReady(timeout: number = 90_000): Promise<void> {
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeout) {
+      // Check if there's an error
+      if (await this.hasError()) {
+        const errorMsg = await this.getErrorMessage();
+        throw new Error(`Agent error: ${errorMsg}`);
+      }
+
+      // Check if input is enabled (agent is ready)
+      const isInputEnabled = await this.isChatInputEnabled();
+      if (isInputEnabled) {
+        return;
+      }
+
+      // Wait a bit before checking again
+      await this.page.waitForTimeout(1000);
+    }
+
+    throw new Error(`Agent not ready within ${timeout}ms timeout`);
+  }
+
+  /**
+   * Check if the chat input is enabled
+   */
+  async isChatInputEnabled(): Promise<boolean> {
+    try {
+      // contentEditable divs don't have a disabled state, check for pointer-events or class
+      const isVisible = await this.chatInput.isVisible();
+      if (!isVisible) return false;
+
+      // Check if there's a loading state or disabled class
+      const classes = await this.chatInput.getAttribute("class");
+      if (classes?.includes("disabled") || classes?.includes("loading")) {
+        return false;
+      }
+
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Send a message to the agent
+   */
+  async sendMessage(message: string): Promise<void> {
+    // Wait for input to be ready
+    await expect(this.chatInput).toBeVisible({ timeout: 30_000 });
+
+    // Clear any existing content and type the message
+    await this.chatInput.click();
+    await this.chatInput.fill("");
+    await this.page.keyboard.type(message);
+
+    // Submit the message
+    await this.page.keyboard.press("Enter");
+
+    // Small delay to ensure message is sent
+    await this.page.waitForTimeout(500);
+  }
+
+  /**
+   * Wait for agent to respond (agent starts processing)
+   */
+  async waitForAgentProcessing(timeout: number = 10_000): Promise<void> {
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeout) {
+      // Check if agent is processing (input disabled or loading indicator visible)
+      const isProcessing = await this.isAgentProcessing();
+      if (isProcessing) {
+        return;
+      }
+
+      await this.page.waitForTimeout(500);
+    }
+
+    // It's okay if we don't see processing state - agent might have already finished
+  }
+
+  /**
+   * Check if agent is currently processing
+   */
+  async isAgentProcessing(): Promise<boolean> {
+    // Check for loading indicators or disabled input
+    const loadingIndicator = this.page
+      .locator(
+        '[data-testid*="loading"], [class*="loading"], [class*="spinner"]',
+      )
+      .first();
+    if (
+      await loadingIndicator.isVisible({ timeout: 1_000 }).catch(() => false)
+    ) {
+      return true;
+    }
+
+    // Check if input is disabled (indicates processing)
+    const isInputEnabled = await this.isChatInputEnabled();
+    return !isInputEnabled;
+  }
+
+  /**
+   * Wait for agent to complete processing and return to ready state
+   */
+  async waitForAgentComplete(timeout: number = 120_000): Promise<void> {
+    const startTime = Date.now();
+
+    // First, wait for processing to start
+    await this.waitForAgentProcessing(10_000).catch(() => {});
+
+    // Then wait for processing to complete
+    while (Date.now() - startTime < timeout) {
+      // Check for errors
+      if (await this.hasError()) {
+        const errorMsg = await this.getErrorMessage();
+        throw new Error(`Agent error during processing: ${errorMsg}`);
+      }
+
+      // Check if agent is back to ready state
+      const isInputEnabled = await this.isChatInputEnabled();
+      if (isInputEnabled) {
+        return;
+      }
+
+      await this.page.waitForTimeout(1000);
+    }
+
+    throw new Error(`Agent did not complete within ${timeout}ms timeout`);
+  }
+
+  /**
+   * Get all visible messages in the chat
+   */
+  async getMessages(): Promise<string[]> {
+    const messageElements = this.page.locator(
+      '[data-testid*="message"], [class*="message"]',
+    );
+    return messageElements.allTextContents();
+  }
+
+  /**
+   * Get the last message from the agent
+   */
+  async getLastAgentMessage(): Promise<string | null> {
+    const messages = await this.getMessages();
+    // Return the last message that's likely from the agent
+    return messages.length > 0 ? messages[messages.length - 1] : null;
+  }
+
+  /**
+   * Wait for a message containing specific text to appear
+   * @param expectedText - The text to search for in messages
+   * @param timeout - Maximum time to wait in milliseconds
+   * @returns The message containing the expected text
+   */
+  async waitForMessageContaining(
+    expectedText: string,
+    timeout: number = 120_000,
+  ): Promise<string> {
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeout) {
+      // Check for errors first
+      if (await this.hasError()) {
+        const errorMsg = await this.getErrorMessage();
+        throw new Error(`Agent error while waiting for message: ${errorMsg}`);
+      }
+
+      // Get all messages and check if any contain the expected text
+      const messages = await this.getMessages();
+      for (const message of messages) {
+        if (message.includes(expectedText)) {
+          return message;
+        }
+      }
+
+      // Wait a bit before checking again
+      await this.page.waitForTimeout(1000);
+    }
+
+    // Get all messages for error reporting
+    const allMessages = await this.getMessages();
+    throw new Error(
+      `Timeout waiting for message containing "${expectedText}" after ${timeout}ms. ` +
+        `Messages found: ${JSON.stringify(allMessages.slice(-5))}`,
+    );
+  }
+
+  /**
+   * Stop the currently running agent
+   */
+  async stopAgent(): Promise<void> {
+    if (
+      await this.stopButton.isVisible({ timeout: 2_000 }).catch(() => false)
+    ) {
+      await this.stopButton.click();
+      await this.page.waitForTimeout(1000);
+    }
+  }
+
+  /**
+   * Verify no error messages are displayed
+   */
+  async verifyNoErrors(): Promise<void> {
+    const hasError = await this.hasError();
+    if (hasError) {
+      const errorMsg = await this.getErrorMessage();
+      throw new Error(`Unexpected error message: ${errorMsg}`);
+    }
+  }
+
+  /**
+   * Execute a complete conversation flow:
+   * 1. Wait for agent to be ready
+   * 2. Send message
+   * 3. Wait for completion
+   * 4. Verify no errors
+   */
+  async executePrompt(
+    message: string,
+    timeout: number = 120_000,
+  ): Promise<void> {
+    // Ensure agent is ready
+    await this.waitForAgentReady(30_000);
+
+    // Send the message
+    await this.sendMessage(message);
+
+    // Wait for completion
+    await this.waitForAgentComplete(timeout);
+
+    // Verify no errors
+    await this.verifyNoErrors();
+  }
+}
@@ -0,0 +1,186 @@
+import { Page, Locator, expect } from "@playwright/test";
+import { BasePage } from "./BasePage";
+
+/**
+ * Page object for the Home screen where users start new conversations
+ * and view recent conversations.
+ */
+export class HomePage extends BasePage {
+  // Main containers
+  readonly homeScreen: Locator;
+
+  readonly newConversationSection: Locator;
+
+  readonly recentConversationsSection: Locator;
+
+  // User avatar and menu
+  readonly userAvatar: Locator;
+
+  readonly accountSettingsMenu: Locator;
+
+  // Repository selection
+  readonly repoSelector: Locator;
+
+  readonly repoSearchInput: Locator;
+
+  constructor(page: Page) {
+    super(page);
+
+    this.homeScreen = page.getByTestId("home-screen");
+    this.newConversationSection = page.getByTestId(
+      "home-screen-new-conversation-section",
+    );
+    this.recentConversationsSection = page.getByTestId(
+      "home-screen-recent-conversations-section",
+    );
+    this.userAvatar = page.getByTestId("user-avatar");
+    this.accountSettingsMenu = page.getByTestId(
+      "account-settings-context-menu",
+    );
+    this.repoSelector = page.locator('[data-testid*="repo"]').first();
+    this.repoSearchInput = page
+      .locator('input[placeholder*="repository"], input[placeholder*="repo"]')
+      .first();
+  }
+
+  /**
+   * Navigate to the home page
+   */
+  async goto(): Promise<void> {
+    await super.goto("/");
+    await this.waitForHomeScreen();
+  }
+
+  /**
+   * Wait for the home screen to be fully loaded
+   */
+  async waitForHomeScreen(): Promise<void> {
+    await expect(this.homeScreen).toBeVisible({ timeout: 30_000 });
+    await this.waitForNetworkIdle();
+  }
+
+  /**
+   * Check if user is logged in by verifying home screen is visible
+   */
+  async isLoggedIn(): Promise<boolean> {
+    try {
+      await expect(this.homeScreen).toBeVisible({ timeout: 10_000 });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Select a repository by searching for it
+   * @param repoUrl - Full repository URL (e.g., https://github.com/OpenHands/deploy)
+   */
+  async selectRepository(repoUrl: string): Promise<void> {
+    // Extract repo name from URL
+    const repoName = repoUrl.split("/").slice(-2).join("/");
+
+    // Look for repository selector/input
+    const repoInput = this.page
+      .locator('input[placeholder*="repository"], input[placeholder*="search"]')
+      .first();
+    const repoSelector = this.page
+      .locator('[class*="repo"], [data-testid*="repo"]')
+      .first();
+
+    // Try to find and interact with repo selection
+    if (await repoInput.isVisible({ timeout: 5_000 }).catch(() => false)) {
+      await repoInput.fill(repoName);
+      await this.page.waitForTimeout(1000); // Wait for search results
+    } else if (
+      await repoSelector.isVisible({ timeout: 5_000 }).catch(() => false)
+    ) {
+      await repoSelector.click();
+      await this.page.waitForTimeout(500);
+    }
+
+    // Click on the repository in the dropdown/list
+    const repoOption = this.page.locator(`text=${repoName}`).first();
+    if (await repoOption.isVisible({ timeout: 5_000 }).catch(() => false)) {
+      await repoOption.click();
+    }
+  }
+
+  /**
+   * Start a new conversation
+   * @param buttonId - Optional test ID of the button to click (default: 'launch-new-conversation-button')
+   */
+  async startNewConversation(
+    buttonId: string = "launch-new-conversation-button",
+  ): Promise<void> {
+    const startButton = this.page.getByTestId(buttonId);
+    if (await startButton.isVisible({ timeout: 5_000 }).catch(() => false)) {
+      await startButton.click();
+    }
+
+    // Wait for conversation/chat interface to load
+    await this.page
+      .waitForURL(/conversation|chat|app/, { timeout: 30_000 })
+      .catch(() => {});
+  }
+
+  /**
+   * Open user settings menu
+   *
+   * Note: The menu is conditionally rendered based on async state (config loaded,
+   * user authenticated, etc.). We need to wait for the menu element to be attached
+   * to the DOM before we can interact with it. The menu appears on hover over the
+   * user-actions container, or when clicking the avatar toggles state.
+   */
+  async openUserMenu(): Promise<void> {
+    // First, wait for the user avatar to be visible
+    await expect(this.userAvatar).toBeVisible({ timeout: 10_000 });
+
+    // Wait for the menu to be attached to the DOM (may not be visible yet)
+    // This ensures the async config/auth state has loaded
+    await this.accountSettingsMenu.waitFor({
+      state: "attached",
+      timeout: 15_000,
+    });
+
+    // Now hover over the user-actions container to trigger the menu visibility
+    // The menu uses CSS group-hover to show, so we need to hover the parent
+    const userActionsContainer = this.page.getByTestId("user-actions");
+    await userActionsContainer.hover();
+
+    // Wait for the menu to become visible
+    await expect(this.accountSettingsMenu).toBeVisible({ timeout: 5_000 });
+  }
+
+  /**
+   * Get list of recent conversations
+   */
+  async getRecentConversations(): Promise<string[]> {
+    await this.waitForElement(this.recentConversationsSection);
+    const conversations = await this.recentConversationsSection
+      .locator("a, button, [role='button']")
+      .allTextContents();
+    return conversations.filter((text) => text.trim().length > 0);
+  }
+
+  /**
+   * Click on the first conversation in the recent conversations list
+   * The conversations are displayed as links in the recent-conversations section
+   */
+  async clickFirstConversation(): Promise<void> {
+    // Wait for recent conversations section to be visible
+    const recentConversations = this.page.getByTestId("recent-conversations");
+    await expect(recentConversations).toBeVisible({ timeout: 10_000 });
+
+    // Find the first conversation link (they link to /conversations/{id})
+    const firstConversationLink = recentConversations
+      .locator('a[href^="/conversations/"]')
+      .first();
+    await expect(firstConversationLink).toBeVisible({ timeout: 10_000 });
+
+    // Click the conversation
+    await firstConversationLink.click();
+
+    // Wait for navigation to conversation page
+    await this.page.waitForURL(/\/conversations\//, { timeout: 30_000 });
+  }
+}
@@ -0,0 +1,3 @@
+export { BasePage } from "./BasePage";
+export { HomePage } from "./HomePage";
+export { ConversationPage, AgentState } from "./ConversationPage";
@@ -0,0 +1,151 @@
+import { defineConfig, devices } from "@playwright/test";
+import dotenv from "dotenv";
+import path from "path";
+import fs from "fs";
+
+// Load environment variables from .env file
+dotenv.config({ path: path.resolve(import.meta.dirname, ".env") });
+
+// Check if auth file exists (will be created by setup project)
+const authFile = path.resolve(import.meta.dirname, "./fixtures/auth.json");
+const hasAuthFile = fs.existsSync(authFile);
+
+/**
+ * Environment URLs for different deployment targets
+ */
+const environments = {
+  staging: "https://staging.all-hands.dev",
+  production: "https://app.all-hands.dev",
+  local: "http://localhost:3000",
+};
+
+/**
+ * Get the base URL from environment variable or default to staging
+ * For feature branches, use: https://<feature_branch_name>.staging.all-hands.dev
+ */
+function getBaseURL(): string {
+  const envUrl = process.env.BASE_URL;
+  if (envUrl) {
+    return envUrl;
+  }
+
+  const env = process.env.TEST_ENV || "staging";
+  return environments[env as keyof typeof environments] || environments.staging;
+}
+
+/**
+ * Playwright configuration for OpenHands integration tests
+ *
+ * Supports multiple environments:
+ * - staging: https://staging.all-hands.dev
+ * - production: https://app.all-hands.dev
+ * - feature branches: https://<branch>.staging.all-hands.dev
+ *
+ * Usage:
+ * - npm run test                    # Run all tests against staging
+ * - npm run test:staging            # Run all tests against staging
+ * - npm run test:production         # Run all tests against production
+ * - BASE_URL=https://my-branch.staging.all-hands.dev npm test  # Feature branch
+ */
+export default defineConfig({
+  testDir: "./tests",
+
+  // Run tests in parallel
+  fullyParallel: false, // Disabled for smoke tests to ensure sequential execution
+
+  // Fail the build on CI if you accidentally left test.only in the source code
+  forbidOnly: !!process.env.CI,
+
+  // Retry failed tests (more retries in CI)
+  retries: process.env.CI ? 2 : 1,
+
+  // Limit parallel workers (smoke tests should run sequentially)
+  workers: process.env.CI ? 1 : 1,
+
+  // Reporter configuration
+  reporter: process.env.CI
+    ? [["html", { outputFolder: "playwright-report" }], ["list"], ["github"]]
+    : [["html", { outputFolder: "playwright-report" }], ["list"]],
+
+  // Timeout configuration
+  timeout: 120_000, // 2 minutes per test (agent operations can be slow)
+  expect: {
+    timeout: 30_000, // 30 seconds for assertions
+  },
+
+  // Shared settings for all projects
+  use: {
+    // Base URL for navigation
+    baseURL: getBaseURL(),
+
+    // Collect trace on failure
+    trace: "on-first-retry",
+
+    // Screenshots on failure
+    screenshot: "only-on-failure",
+
+    // Video recording (useful for debugging CI failures)
+    video: process.env.CI ? "on-first-retry" : "off",
+
+    // Ignore SSL errors (for staging/development environments)
+    ignoreHTTPSErrors: true,
+
+    // Use persisted authentication state only if it exists
+    storageState: hasAuthFile ? authFile : undefined,
+
+    // Browser viewport
+    viewport: { width: 1280, height: 720 },
+
+    // Action timeout
+    actionTimeout: 15_000,
+
+    // Navigation timeout
+    navigationTimeout: 30_000,
+  },
+
+  // Define test projects
+  projects: [
+    // Setup project - handles authentication
+    {
+      name: "setup",
+      testMatch: /global-setup\.ts/,
+      use: {
+        storageState: undefined, // Don't use existing auth for setup
+      },
+    },
+
+    // Chromium tests (primary browser)
+    {
+      name: "chromium",
+      use: {
+        ...devices["Desktop Chrome"],
+      },
+      dependencies: ["setup"],
+    },
+
+    // Firefox tests (optional - run with --project=firefox)
+    {
+      name: "firefox",
+      use: {
+        ...devices["Desktop Firefox"],
+      },
+      dependencies: ["setup"],
+    },
+
+    // WebKit tests (optional - run with --project=webkit)
+    {
+      name: "webkit",
+      use: {
+        ...devices["Desktop Safari"],
+      },
+      dependencies: ["setup"],
+    },
+  ],
+
+  // Output directory for test artifacts
+  outputDir: "./test-results",
+
+  // Global setup/teardown
+  globalSetup: undefined, // We use a setup project instead for better parallelization
+  globalTeardown: undefined,
+});
@@ -0,0 +1,403 @@
+import { test, expect } from "@playwright/test";
+import { ChildProcess, spawn } from "child_process";
+import path from "path";
+import crypto from "crypto";
+import {
+  MockGitHubClient,
+  createMockGitHubClient,
+  createIssueLabeledPayload,
+} from "../mocks";
+import { ConversationPage, HomePage } from "../pages";
+
+/**
+ * GitHub Resolver Integration Tests
+ *
+ * These tests verify the GitHub resolver integration in two modes:
+ *
+ * ## Mode 1: Mock Server Tests (for local development)
+ * Uses a local mock GitHub server to test the full webhook flow.
+ * Requires:
+ * - OpenHands running locally with GITHUB_APP_WEBHOOK_SECRET=test-webhook-secret
+ * - The app configured to use the mock server for GitHub API calls
+ *
+ * ## Mode 2: Live Environment Tests (for staging/production)
+ * Tests against real deployed environments using the real GitHub API.
+ * Requires:
+ * - GITHUB_TEST_USERNAME and GITHUB_TEST_PASSWORD for authentication
+ * - The webhook endpoint to be accessible
+ *
+ * Environment Variables:
+ * - USE_MOCK_GITHUB: Set to "true" to use mock server mode
+ * - MOCK_GITHUB_PORT: Port for the mock GitHub server (default: 9999)
+ * - APP_PORT: Port where the OpenHands app is running (default: 12000)
+ * - GITHUB_APP_WEBHOOK_SECRET: Webhook secret for local testing
+ *
+ * Tags:
+ * - @github-resolver: GitHub resolver integration tests
+ * - @enterprise: Tests requiring enterprise features
+ */
+
+// Configuration
+const USE_MOCK_GITHUB = process.env.USE_MOCK_GITHUB === "true";
+const MOCK_GITHUB_PORT = parseInt(process.env.MOCK_GITHUB_PORT || "9999", 10);
+const APP_PORT = parseInt(process.env.APP_PORT || "12000", 10);
+const MOCK_SERVER_STARTUP_TIMEOUT = 30_000;
+const RESOLVER_RESPONSE_TIMEOUT = 180_000;
+
+// Mock server process
+let mockServerProcess: ChildProcess | null = null;
+let mockClient: MockGitHubClient | null = null;
+
+/**
+ * Generate webhook signature for testing
+ */
+function generateWebhookSignature(payload: string, secret: string): string {
+  const hmac = crypto.createHmac("sha256", secret);
+  hmac.update(payload);
+  return `sha256=${hmac.digest("hex")}`;
+}
+
+/**
+ * Start the mock GitHub server as a background process
+ */
+async function startMockServer(): Promise<void> {
+  if (!USE_MOCK_GITHUB) return;
+
+  const serverPath = path.join(
+    import.meta.dirname,
+    "../mocks/github-mock-server.ts",
+  );
+
+  console.log(`Starting mock GitHub server on port ${MOCK_GITHUB_PORT}...`);
+
+  mockServerProcess = spawn("npx", ["tsx", serverPath], {
+    env: {
+      ...process.env,
+      MOCK_GITHUB_PORT: String(MOCK_GITHUB_PORT),
+      MOCK_GITHUB_WEBHOOK_SECRET:
+        process.env.GITHUB_APP_WEBHOOK_SECRET || "test-webhook-secret",
+    },
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+
+  mockServerProcess.stdout?.on("data", (data) => {
+    console.log(`[Mock GitHub] ${data.toString().trim()}`);
+  });
+
+  mockServerProcess.stderr?.on("data", (data) => {
+    console.error(`[Mock GitHub ERROR] ${data.toString().trim()}`);
+  });
+
+  mockServerProcess.on("error", (error) => {
+    console.error(`[Mock GitHub] Failed to start server: ${error.message}`);
+  });
+
+  mockServerProcess.on("exit", (code) => {
+    console.log(`[Mock GitHub] Server exited with code ${code}`);
+  });
+
+  mockClient = createMockGitHubClient(MOCK_GITHUB_PORT, APP_PORT);
+  await mockClient.waitForReady(MOCK_SERVER_STARTUP_TIMEOUT);
+  console.log("Mock GitHub server is ready");
+}
+
+/**
+ * Stop the mock GitHub server
+ */
+async function stopMockServer(): Promise<void> {
+  if (mockServerProcess) {
+    console.log("Stopping mock GitHub server...");
+    mockServerProcess.kill("SIGTERM");
+
+    await new Promise<void>((resolve) => {
+      const timeout = setTimeout(() => {
+        mockServerProcess?.kill("SIGKILL");
+        resolve();
+      }, 5000);
+
+      mockServerProcess?.on("exit", () => {
+        clearTimeout(timeout);
+        resolve();
+      });
+    });
+
+    mockServerProcess = null;
+    console.log("Mock GitHub server stopped");
+  }
+}
+
+// ============================================================================
+// MOCK SERVER TESTS (for local development with mock GitHub)
+// ============================================================================
+
+test.describe("GitHub Resolver - Mock Server @github-resolver @enterprise @mock", () => {
+  test.describe.configure({ mode: "serial" });
+
+  // Skip this entire suite unless USE_MOCK_GITHUB is true
+  test.skip(!USE_MOCK_GITHUB, "Requires USE_MOCK_GITHUB=true");
+
+  test.beforeAll(async () => {
+    await startMockServer();
+  });
+
+  test.afterAll(async () => {
+    await stopMockServer();
+  });
+
+  test.beforeEach(async () => {
+    if (mockClient) {
+      await mockClient.reset();
+    }
+  });
+
+  test("mock server should be healthy", async () => {
+    expect(mockClient).not.toBeNull();
+    const isHealthy = await mockClient!.healthCheck();
+    expect(isHealthy).toBe(true);
+  });
+
+  test("should process issue labeled webhook and create conversation", async ({
+    page,
+    baseURL,
+  }) => {
+    expect(mockClient).not.toBeNull();
+
+    console.log("Triggering issue labeled webhook...");
+    const webhookResult = await mockClient!.triggerIssueLabeledEvent({
+      issueTitle: "Add README file",
+      issueBody: "Please add a README.md file with project documentation.",
+      labelName: "openhands",
+    });
+
+    console.log(`Webhook response: ${JSON.stringify(webhookResult)}`);
+    expect(webhookResult.responseStatus).toBe(200);
+
+    console.log("Waiting for resolver response...");
+    const response = await mockClient!.waitForResponseContaining(
+      "I'm on it",
+      RESOLVER_RESPONSE_TIMEOUT,
+    );
+
+    console.log(`Resolver response: ${response.body}`);
+    expect(response.body).toContain("I'm on it");
+    expect(response.body).toContain("track my progress");
+
+    const conversationLinkMatch = response.body.match(
+      /conversations\/([a-f0-9]+)/,
+    );
+    expect(conversationLinkMatch).not.toBeNull();
+
+    const conversationId = conversationLinkMatch![1];
+    console.log(`Conversation ID: ${conversationId}`);
+
+    const conversationPage = new ConversationPage(page);
+    await page.goto(`${baseURL}/conversations/${conversationId}`);
+    await conversationPage.waitForConversationReady(30_000);
+    await expect(conversationPage.chatBox).toBeVisible();
+
+    await page.screenshot({
+      path: "test-results/screenshots/github-resolver-conversation.png",
+    });
+
+    console.log("Issue labeled webhook test passed");
+  });
+
+  test("should process issue comment webhook with @openhands mention", async ({
+    page,
+    baseURL,
+  }) => {
+    expect(mockClient).not.toBeNull();
+
+    console.log("Triggering issue comment webhook...");
+    const webhookResult = await mockClient!.triggerIssueCommentEvent({
+      issueTitle: "Bug: Application crashes on startup",
+      issueBody: "The application crashes when I try to start it.",
+      commentBody: "@openhands please investigate this crash and fix it",
+    });
+
+    console.log(`Webhook response: ${JSON.stringify(webhookResult)}`);
+    expect(webhookResult.responseStatus).toBe(200);
+
+    console.log("Waiting for resolver response...");
+    const response = await mockClient!.waitForResponseContaining(
+      "I'm on it",
+      RESOLVER_RESPONSE_TIMEOUT,
+    );
+
+    console.log(`Resolver response: ${response.body}`);
+    expect(response.body).toContain("I'm on it");
+
+    const conversationLinkMatch = response.body.match(
+      /conversations\/([a-f0-9]+)/,
+    );
+    expect(conversationLinkMatch).not.toBeNull();
+
+    const conversationId = conversationLinkMatch![1];
+    const conversationPage = new ConversationPage(page);
+    await page.goto(`${baseURL}/conversations/${conversationId}`);
+    await conversationPage.waitForConversationReady(30_000);
+
+    await page.screenshot({
+      path: "test-results/screenshots/github-resolver-issue-comment.png",
+    });
+  });
+});
+
+// ============================================================================
+// LIVE ENVIRONMENT TESTS (for staging/production with real GitHub)
+// ============================================================================
+
+test.describe("GitHub Resolver - Live Environment @github-resolver @enterprise @live", () => {
+  test.describe.configure({ mode: "serial" });
+
+  let homePage: HomePage;
+
+  test.beforeEach(async ({ page }) => {
+    homePage = new HomePage(page);
+  });
+
+  test("should verify resolver conversations appear in conversation list", async ({
+    page,
+  }) => {
+    /**
+     * This test verifies that resolver-triggered conversations appear in the
+     * user's conversation list. It checks the infrastructure is working by
+     * looking at existing conversations.
+     */
+
+    // Navigate to home page (requires authentication via global-setup)
+    await homePage.goto();
+    await expect(homePage.homeScreen).toBeVisible({ timeout: 30_000 });
+
+    // Look for recent conversations
+    const recentConversations = page.getByTestId("recent-conversations");
+    await expect(recentConversations).toBeVisible({ timeout: 10_000 });
+
+    const conversationLinks = recentConversations.locator(
+      'a[href^="/conversations/"]',
+    );
+    const count = await conversationLinks.count();
+
+    console.log(`Found ${count} recent conversations`);
+
+    await page.screenshot({
+      path: "test-results/screenshots/resolver-conversations-list.png",
+    });
+
+    if (count > 0) {
+      const firstConversation = conversationLinks.first();
+      await firstConversation.click();
+
+      const conversationPage = new ConversationPage(page);
+      await conversationPage.waitForConversationReady(30_000);
+
+      await page.screenshot({
+        path: "test-results/screenshots/resolver-conversation-detail.png",
+      });
+
+      console.log("Successfully navigated to a conversation");
+    }
+  });
+
+  test("should be able to send webhook with valid signature format", async ({
+    baseURL,
+    request,
+  }) => {
+    /**
+     * This test verifies the webhook endpoint exists and validates signatures.
+     * We send a properly formatted but invalid webhook to verify:
+     * 1. The endpoint exists
+     * 2. Signature verification is working
+     */
+
+    const payload = createIssueLabeledPayload({
+      issueTitle: "Test Issue",
+      issueBody: "Test body for integration test",
+      labelName: "openhands",
+    });
+
+    const payloadString = JSON.stringify(payload);
+    const signature = generateWebhookSignature(payloadString, "wrong-secret");
+
+    const response = await request.post(
+      `${baseURL}/api/integration/github/events`,
+      {
+        headers: {
+          "Content-Type": "application/json",
+          "X-GitHub-Event": "issues",
+          "X-Hub-Signature-256": signature,
+          "X-GitHub-Delivery": crypto.randomUUID(),
+        },
+        data: payload,
+      },
+    );
+
+    console.log(`Webhook response status: ${response.status()}`);
+
+    // Either 403 (signature invalid) or 200 (if webhooks disabled) is acceptable
+    expect([200, 403]).toContain(response.status());
+
+    const responseText = await response.text();
+    console.log(`Webhook response: ${responseText}`);
+
+    if (response.status() === 403) {
+      console.log(
+        "Webhook signature validation is working (403 = invalid signature)",
+      );
+    } else if (response.status() === 200) {
+      const body = JSON.parse(responseText);
+      if (body.message?.includes("disabled")) {
+        console.log("GitHub webhooks are disabled on this environment");
+      }
+    }
+  });
+});
+
+// ============================================================================
+// ERROR HANDLING TESTS
+// ============================================================================
+
+test.describe("GitHub Resolver - Error Handling @github-resolver @enterprise", () => {
+  test("should reject webhook without signature header", async ({
+    baseURL,
+    request,
+  }) => {
+    const payload = { action: "labeled", installation: { id: 12345 } };
+
+    const response = await request.post(
+      `${baseURL}/api/integration/github/events`,
+      {
+        headers: {
+          "Content-Type": "application/json",
+          "X-GitHub-Event": "issues",
+        },
+        data: payload,
+      },
+    );
+
+    console.log(
+      `Response status: ${response.status()} (expected 403 or 200 if disabled)`,
+    );
+    expect([200, 403]).toContain(response.status());
+  });
+
+  test("should handle malformed JSON gracefully", async ({
+    baseURL,
+    request,
+  }) => {
+    const response = await request.post(
+      `${baseURL}/api/integration/github/events`,
+      {
+        headers: {
+          "Content-Type": "application/json",
+          "X-GitHub-Event": "issues",
+          "X-Hub-Signature-256": "sha256=invalid",
+        },
+        data: "not valid json{{{",
+      },
+    );
+
+    console.log(`Response status: ${response.status()}`);
+    expect([400, 403, 422, 500]).toContain(response.status());
+  });
+});
@@ -0,0 +1,325 @@
+import { test as setup, expect } from "@playwright/test";
+import path from "path";
+import fs from "fs";
+
+const authFile = path.join(import.meta.dirname, "../fixtures/auth.json");
+
+/**
+ * Global setup test that handles authentication.
+ *
+ * This test runs before all other tests and saves the authentication state
+ * to a file that can be reused across test runs.
+ *
+ * Authentication Methods:
+ * 1. GitHub OAuth (default) - Requires GITHUB_TEST_USERNAME and GITHUB_TEST_PASSWORD
+ * 2. Keycloak - Can be customized via KEYCLOAK_* environment variables
+ * 3. Pre-existing auth state - If fixtures/auth.json exists and is valid
+ *
+ * Environment Variables:
+ * - AUTH_METHOD: "github" | "keycloak" | "skip" (default: "github")
+ * - GITHUB_TEST_USERNAME: GitHub username for test account
+ * - GITHUB_TEST_PASSWORD: GitHub password for test account
+ * - GITHUB_TEST_TOTP_SECRET: (Optional) TOTP secret for 2FA
+ * - KEYCLOAK_URL: Keycloak server URL
+ * - KEYCLOAK_USERNAME: Keycloak test username
+ * - KEYCLOAK_PASSWORD: Keycloak test password
+ */
+setup("authenticate", async ({ page, baseURL }) => {
+  const authMethod = process.env.AUTH_METHOD || "github";
+
+  // Check if we should skip authentication (use existing auth state)
+  if (authMethod === "skip") {
+    if (fs.existsSync(authFile)) {
+      console.log(
+        "Using existing authentication state from fixtures/auth.json",
+      );
+      return;
+    }
+    throw new Error(
+      "AUTH_METHOD=skip but no existing auth.json found. Please run authentication first.",
+    );
+  }
+
+  // Navigate to the application
+  await page.goto(baseURL || "/");
+
+  // Check if already authenticated
+  const isAuthenticated = await checkIfAuthenticated(page);
+  if (isAuthenticated) {
+    console.log("Already authenticated, saving state...");
+    await page.context().storageState({ path: authFile });
+    return;
+  }
+
+  // Perform authentication based on method
+  if (authMethod === "github") {
+    await authenticateWithGitHub(page);
+  } else if (authMethod === "keycloak") {
+    await authenticateWithKeycloak(page);
+  } else {
+    throw new Error(`Unknown AUTH_METHOD: ${authMethod}`);
+  }
+
+  // Wait for successful redirect back to app (could be home page or accept-tos)
+  await page.waitForURL(
+    (url) => {
+      const urlString = url.toString();
+      return (
+        !urlString.includes("github.com") &&
+        !urlString.includes("login") &&
+        !urlString.includes("keycloak")
+      );
+    },
+    { timeout: 60_000 },
+  );
+
+  // Handle TOS acceptance if redirected to accept-tos page
+  if (page.url().includes("/accept-tos")) {
+    console.log(
+      "Redirected to accept-tos page after authentication, handling TOS acceptance...",
+    );
+    await handleTOSAcceptance(page);
+  }
+
+  // Verify authentication succeeded
+  await expect(page.getByTestId("home-screen")).toBeVisible({
+    timeout: 30_000,
+  });
+
+  // Save authentication state
+  await page.context().storageState({ path: authFile });
+  console.log("Authentication successful, state saved to fixtures/auth.json");
+});
+
+/**
+ * Check if the user is already authenticated
+ */
+async function checkIfAuthenticated(
+  page: import("@playwright/test").Page,
+): Promise<boolean> {
+  try {
+    // Look for elements that indicate authentication
+    const homeScreen = page.getByTestId("home-screen");
+    const loginPage = page.getByTestId("login-page");
+
+    // Wait a bit for the page to stabilize
+    await page
+      .waitForLoadState("networkidle", { timeout: 10_000 })
+      .catch(() => {});
+
+    // Check if we're on the home screen (authenticated)
+    const isOnHome = await homeScreen.isVisible().catch(() => false);
+    const isOnLogin = await loginPage.isVisible().catch(() => false);
+
+    return isOnHome && !isOnLogin;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Authenticate using GitHub OAuth
+ */
+async function authenticateWithGitHub(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  const username = process.env.GITHUB_TEST_USERNAME;
+  const password = process.env.GITHUB_TEST_PASSWORD;
+
+  if (!username || !password) {
+    throw new Error(
+      "GitHub authentication requires GITHUB_TEST_USERNAME and GITHUB_TEST_PASSWORD environment variables",
+    );
+  }
+
+  console.log("Starting GitHub authentication...");
+
+  // Click the GitHub login button
+  const githubButton = page.getByRole("button", { name: "Log in with GitHub" });
+  await expect(githubButton).toBeVisible({ timeout: 10_000 });
+  await githubButton.click();
+
+  // Wait for redirect - could be GitHub.com, home page, or accept-tos
+  // If user is already logged into Keycloak, they may be redirected back to the app
+  await page.waitForURL(
+    (url) => {
+      const urlString = url.toString();
+      return (
+        urlString.includes("github.com") ||
+        urlString.includes("/accept-tos") ||
+        // Check if redirected back to home (no login/keycloak in URL)
+        (!urlString.includes("keycloak") && !urlString.includes("/login"))
+      );
+    },
+    { timeout: 30_000 },
+  );
+
+  const currentUrl = page.url();
+
+  // If redirected to accept-tos, handle TOS acceptance
+  if (currentUrl.includes("/accept-tos")) {
+    console.log("Redirected to accept-tos page, handling TOS acceptance...");
+    await handleTOSAcceptance(page);
+    console.log("TOS acceptance completed");
+    return;
+  }
+
+  // If redirected to home page (already authenticated via Keycloak session)
+  if (!currentUrl.includes("github.com")) {
+    console.log("Already authenticated via Keycloak session");
+    return;
+  }
+
+  // Continue with GitHub login flow
+  // Fill in GitHub credentials
+  const usernameField = page.locator('input[name="login"]');
+  const passwordField = page.locator('input[name="password"]');
+
+  await usernameField.waitFor({ state: "visible", timeout: 10_000 });
+
+  await usernameField.fill(username);
+  await passwordField.fill(password);
+
+  // Submit the form
+  await page.locator('input[type="submit"][value="Sign in"]').click();
+
+  // Handle potential 2FA
+  const totpSecret = process.env.GITHUB_TEST_TOTP_SECRET;
+  if (totpSecret) {
+    await handle2FA(page, totpSecret);
+  }
+
+  // Handle OAuth authorization if needed
+  await handleOAuthAuthorization(page);
+
+  console.log("GitHub authentication flow completed");
+}
+
+/**
+ * Handle Terms of Service acceptance flow
+ */
+async function handleTOSAcceptance(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  // Wait for the TOS page to be fully loaded
+  await page
+    .waitForLoadState("networkidle", { timeout: 10_000 })
+    .catch(() => {});
+
+  // Find and click the TOS checkbox
+  const tosCheckbox = page.locator('input[type="checkbox"]');
+  await tosCheckbox.waitFor({ state: "visible", timeout: 10_000 });
+  await tosCheckbox.click();
+
+  // Find and click the Continue button
+  const continueButton = page.getByRole("button", { name: "Continue" });
+  await expect(continueButton).toBeEnabled({ timeout: 5_000 });
+  await continueButton.click();
+
+  // Wait for redirect to home page after TOS acceptance
+  await page.waitForURL(
+    (url) => {
+      const urlString = url.toString();
+      return !urlString.includes("/accept-tos");
+    },
+    { timeout: 30_000 },
+  );
+}
+
+/**
+ * Handle GitHub 2FA if enabled
+ */
+async function handle2FA(
+  page: import("@playwright/test").Page,
+  totpSecret: string,
+): Promise<void> {
+  try {
+    // Check if 2FA page appears
+    const otpField = page.locator('input[name="otp"]');
+    const isOtpVisible = await otpField
+      .isVisible({ timeout: 5_000 })
+      .catch(() => false);
+
+    if (isOtpVisible) {
+      console.log("2FA required, generating TOTP code...");
+
+      // Generate TOTP code (you'd need to implement this or use a library)
+      const totpCode = await generateTOTP(totpSecret);
+      await otpField.fill(totpCode);
+
+      // Submit 2FA
+      await page.locator('button[type="submit"]').click();
+    }
+  } catch {
+    // 2FA not required, continue
+  }
+}
+
+/**
+ * Generate TOTP code from secret
+ * Note: In production, use a proper TOTP library like 'otplib'
+ */
+async function generateTOTP(_secret: string): Promise<string> {
+  // This is a placeholder - in production, use:
+  // import { authenticator } from 'otplib';
+  // return authenticator.generate(secret);
+  throw new Error(
+    "TOTP generation not implemented. Install 'otplib' package and implement generateTOTP function.",
+  );
+}
+
+/**
+ * Handle OAuth authorization prompt if it appears
+ */
+async function handleOAuthAuthorization(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  try {
+    // Check if we need to authorize the app
+    const authorizeButton = page.locator('button[name="authorize"]');
+    const isAuthVisible = await authorizeButton
+      .isVisible({ timeout: 5_000 })
+      .catch(() => false);
+
+    if (isAuthVisible) {
+      console.log("OAuth authorization required, clicking authorize...");
+      await authorizeButton.click();
+    }
+  } catch {
+    // No authorization needed, continue
+  }
+}
+
+/**
+ * Authenticate using Keycloak
+ */
+async function authenticateWithKeycloak(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  const username = process.env.KEYCLOAK_USERNAME;
+  const password = process.env.KEYCLOAK_PASSWORD;
+
+  if (!username || !password) {
+    throw new Error(
+      "Keycloak authentication requires KEYCLOAK_USERNAME and KEYCLOAK_PASSWORD environment variables",
+    );
+  }
+
+  console.log("Starting Keycloak authentication...");
+
+  // Navigate to login page and initiate Keycloak flow
+  // The exact flow depends on your Keycloak configuration
+  await page.goto("/login");
+
+  // Wait for Keycloak login page
+  await page.waitForURL(/keycloak|auth/, { timeout: 30_000 });
+
+  // Fill in Keycloak credentials
+  await page.locator("#username").fill(username);
+  await page.locator("#password").fill(password);
+
+  // Submit
+  await page.locator("#kc-login").click();
+
+  console.log("Keycloak authentication flow completed");
+}
@@ -0,0 +1,484 @@
+import { test, expect } from "@playwright/test";
+import { HomePage, ConversationPage } from "../pages";
+
+/**
+ * Smoke Tests for OpenHands Application
+ *
+ * These tests verify the critical path of the application:
+ * 1. User can log in (handled by global-setup)
+ * 2. User can access the home screen
+ * 3. User can select a repository
+ * 4. User can start a conversation
+ * 5. Agent can process a simple prompt without errors
+ *
+ * Tags:
+ * - @smoke: Core smoke tests that must pass
+ * - @critical: Critical functionality tests
+ *
+ * Environment Variables:
+ * - TEST_REPO_URL: Repository to use for testing (default: null)
+ */
+
+// Test configuration
+const { TEST_REPO_URL } = process.env;
+
+test.describe("Smoke Tests @smoke", () => {
+  test.describe.configure({ mode: "serial" }); // Run tests in sequence
+
+  let homePage: HomePage;
+  let conversationPage: ConversationPage;
+
+  test.beforeEach(async ({ page }) => {
+    homePage = new HomePage(page);
+    conversationPage = new ConversationPage(page);
+  });
+
+  test("should display home screen after authentication @critical", async ({
+    page,
+  }) => {
+    await homePage.goto();
+
+    // Verify home screen is visible
+    await expect(homePage.homeScreen).toBeVisible({ timeout: 30_000 });
+
+    // Verify key sections are present
+    await expect(homePage.newConversationSection).toBeVisible();
+
+    // Take screenshot for verification
+    await page.screenshot({ path: "test-results/screenshots/home-screen.png" });
+  });
+
+  test("should have user avatar visible indicating logged in state @critical", async () => {
+    await homePage.goto();
+
+    // Verify user is logged in
+    const isLoggedIn = await homePage.isLoggedIn();
+    expect(isLoggedIn).toBe(true);
+
+    // Verify user avatar is visible
+    await expect(homePage.userAvatar).toBeVisible();
+  });
+
+  test("should be able to open user menu", async () => {
+    await homePage.goto();
+
+    // Open user menu
+    await homePage.openUserMenu();
+
+    // Verify menu is visible
+    await expect(homePage.accountSettingsMenu).toBeVisible();
+  });
+
+  test("should be able to purchase $10 credits via Stripe @critical", async ({
+    page,
+  }) => {
+    // Navigate to home and open user menu
+    await homePage.goto();
+    await homePage.openUserMenu();
+
+    // Click on Billing link in the user menu
+    const billingLink = page.getByRole("link", { name: /billing/i });
+    await billingLink.click();
+
+    // Wait for billing page to load
+    await page.waitForURL(/\/settings\/billing/, { timeout: 30_000 });
+    await expect(page.getByTestId("billing-settings")).toBeVisible({
+      timeout: 10_000,
+    });
+
+    // Capture initial balance
+    const balanceElement = page.getByTestId("user-balance");
+    await expect(balanceElement).toBeVisible({ timeout: 10_000 });
+    const initialBalanceText = await balanceElement.textContent();
+    const initialBalance = parseFloat(
+      initialBalanceText?.replace("$", "") || "0",
+    );
+    console.log(`Initial balance: $${initialBalance.toFixed(2)}`);
+
+    // Enter $10 in the Add Funds input
+    const topUpInput = page.getByTestId("top-up-input");
+    await topUpInput.fill("10");
+
+    // Click Add Credit button
+    const addCreditButton = page.getByRole("button", { name: /add credit/i });
+    await expect(addCreditButton).toBeEnabled({ timeout: 5_000 });
+    await addCreditButton.click();
+
+    // Wait for redirect to Stripe checkout
+    await page.waitForURL(/checkout\.stripe\.com/, { timeout: 30_000 });
+    console.log("Redirected to Stripe checkout");
+
+    // Wait for the Pay button to be present (indicates form is ready)
+    const payButton = page.locator(".SubmitButton");
+    await payButton.waitFor({ state: "attached", timeout: 30_000 });
+    console.log("Stripe checkout form loaded");
+
+    // Fill in card number
+    const cardNumberInput = page.locator("#cardNumber");
+    await cardNumberInput.fill("5105105105105100");
+
+    // Fill in expiry date
+    const cardExpiryInput = page.locator("#cardExpiry");
+    await cardExpiryInput.fill("12/35");
+
+    // Fill in CVC
+    const cardCvcInput = page.locator("#cardCvc");
+    await cardCvcInput.fill("123");
+
+    // Fill in cardholder name
+    const billingNameInput = page.locator("#billingName");
+    await billingNameInput.fill("Testy Tester");
+
+    // Fill in ZIP code
+    const postalCodeInput = page.locator("#billingPostalCode");
+    await postalCodeInput.fill("12345");
+
+    // Take screenshot of filled Stripe form
+    await page.screenshot({
+      path: "test-results/screenshots/stripe-checkout-filled.png",
+    });
+
+    // Click Pay button
+    await payButton.click();
+
+    // Wait for redirect back to billing page
+    await page.waitForURL(/\/settings\/billing/, { timeout: 60_000 });
+    console.log("Returned to billing page after payment");
+
+    // Wait for balance to update (may need to wait for API refresh)
+    await page.waitForTimeout(2000);
+
+    // Verify balance increased by $10
+    await expect(balanceElement).toBeVisible({ timeout: 10_000 });
+    const newBalanceText = await balanceElement.textContent();
+    const newBalance = parseFloat(newBalanceText?.replace("$", "") || "0");
+    console.log(`New balance: $${newBalance.toFixed(2)}`);
+
+    const expectedBalance = initialBalance + 10;
+    expect(newBalance).toBeCloseTo(expectedBalance, 2);
+    console.log(
+      `Balance increased by $10: $${initialBalance.toFixed(2)} -> $${newBalance.toFixed(2)}`,
+    );
+
+    // Take screenshot of updated balance
+    await page.screenshot({
+      path: "test-results/screenshots/billing-after-payment.png",
+    });
+  });
+
+  test("should be able to start a conversation, send a prompt, and receive response @critical", async ({
+    page,
+  }) => {
+    // Navigate to home
+    await homePage.goto();
+
+    if (TEST_REPO_URL) {
+      // Select repository if repo selection is available
+      try {
+        await homePage.selectRepository(TEST_REPO_URL);
+        console.log(`Selected repository: ${TEST_REPO_URL}`);
+      } catch (e) {
+        console.log(
+          "Repository selection not available or failed, continuing...",
+        );
+      }
+      // Start a new conversation
+      await homePage.startNewConversation("repo-launch-button");
+    } else {
+      await homePage.startNewConversation("launch-new-conversation-button");
+    }
+
+    // Wait for conversation page to load
+    await page.waitForTimeout(2000); // Allow navigation to complete
+
+    // Initialize conversation page
+    conversationPage = new ConversationPage(page);
+
+    // Wait for the agent to be ready
+    await conversationPage.waitForConversationReady();
+
+    // Verify chat interface is available
+    await expect(conversationPage.chatBox).toBeVisible();
+    await expect(conversationPage.chatInput).toBeVisible();
+
+    // Take screenshot before sending message
+    await page.screenshot({
+      path: "test-results/screenshots/conversation-ready.png",
+    });
+
+    // Execute the test prompt
+    const prompt = "Reverse the word 'hello'";
+    console.log(`Sending prompt: "${prompt}"`);
+    await conversationPage.executePrompt(prompt, 120_000);
+
+    // Wait for a message containing the expected reversed word
+    const message = await conversationPage.waitForMessageContaining(
+      "olleh",
+      120_000,
+    );
+    console.log(
+      `Found expected response containing 'olleh': "${message.substring(0, 100)}..."`,
+    );
+
+    // Take screenshot of successful response
+    await page.screenshot({
+      path: "test-results/screenshots/agent-response.png",
+    });
+
+    console.log("Smoke test passed: Agent correctly reversed the word");
+  });
+
+  test("should be able to navigate to a running conversation @critical", async ({
+    page,
+  }) => {
+    // Navigate to home page
+    await homePage.goto();
+
+    // Click on the first conversation in the recent conversations list
+    await homePage.clickFirstConversation();
+
+    // Initialize conversation page
+    conversationPage = new ConversationPage(page);
+
+    // Wait for the conversation to be ready by checking for "Waiting for task" status
+    await conversationPage.waitForConversationReady();
+
+    // Take screenshot of successful navigation
+    await page.screenshot({
+      path: "test-results/screenshots/navigated-conversation.png",
+    });
+
+    console.log("Successfully navigated to running conversation");
+  });
+
+  test("should be able to use Tavily search and get accurate response", async ({
+    page,
+  }) => {
+    // Navigate to home page
+    await homePage.goto();
+
+    // Click on the first conversation in the recent conversations list
+    await homePage.clickFirstConversation();
+
+    // Initialize conversation page
+    conversationPage = new ConversationPage(page);
+
+    // Wait for the agent to be ready
+    await conversationPage.waitForConversationReady();
+
+    // Send the Tavily search prompt
+    const prompt =
+      "Using Tavily search, please tell me who is the prime minister of Ireland.";
+    console.log(`Sending prompt: "${prompt}"`);
+    await conversationPage.executePrompt(prompt, 180_000); // Longer timeout for search
+
+    // Wait for a message containing the expected answer
+    const message = await conversationPage.waitForMessageContaining(
+      "Micheál Martin",
+      180_000,
+    );
+    console.log(
+      `Found expected response containing 'Micheál Martin': "${message.substring(0, 100)}..."`,
+    );
+
+    // Take screenshot of successful response
+    await page.screenshot({
+      path: "test-results/screenshots/tavily-search-response.png",
+    });
+
+    console.log(
+      "Tavily search test passed: Agent correctly identified the Prime Minister of Ireland",
+    );
+  });
+
+  test("should be able to create API key and use it to access the API @critical", async ({
+    page,
+    request,
+    baseURL,
+  }) => {
+    const API_KEY_NAME = "Integration Test Key";
+
+    // Navigate to home and open user menu
+    await homePage.goto();
+    await homePage.openUserMenu();
+
+    // Click on API Keys link in the user menu
+    const apiKeysLink = page.getByRole("link", { name: /api keys/i });
+    await apiKeysLink.click();
+
+    // Wait for API Keys page to load
+    await page.waitForURL(/\/settings\/api-keys/, { timeout: 30_000 });
+    console.log("Navigated to API Keys page");
+
+    // Verify "Refresh API Key" button is visible (indicates user has credits)
+    const refreshApiKeyButton = page.getByRole("button", { name: /refresh/i });
+    await expect(refreshApiKeyButton).toBeVisible({ timeout: 10_000 });
+    console.log("Refresh API Key button is visible - user has credits");
+
+    // Delete any existing "Integration Test Key" if it exists
+    const existingKeyRow = page.locator("tr", { hasText: API_KEY_NAME });
+    if (await existingKeyRow.isVisible({ timeout: 2_000 }).catch(() => false)) {
+      console.log(`Found existing "${API_KEY_NAME}", deleting it...`);
+      const deleteButton = existingKeyRow.locator(
+        'button[aria-label^="Delete"]',
+      );
+      await deleteButton.click();
+
+      // Confirm deletion in modal
+      const deleteModal = page.getByTestId("delete-api-key-modal");
+      await expect(deleteModal).toBeVisible({ timeout: 5_000 });
+      // This is so ugly. :(
+      const confirmDeleteButton = deleteModal
+        .locator("xpath=..")
+        .getByRole("button")
+        .first();
+      await confirmDeleteButton.click();
+
+      // Wait for modal to close
+      await expect(deleteModal).not.toBeVisible({ timeout: 5_000 });
+      console.log(`Deleted existing "${API_KEY_NAME}"`);
+
+      // Wait for the page to settle after deletion
+      await page.waitForTimeout(1000);
+    }
+
+    // Click "Create API Key" button
+    const createApiKeyButton = page.getByRole("button", {
+      name: /create api key/i,
+    });
+    await expect(createApiKeyButton).toBeVisible({ timeout: 10_000 });
+    await createApiKeyButton.click();
+
+    // Wait for create modal to appear
+    const createModal = page.getByTestId("create-api-key-modal");
+    await expect(createModal).toBeVisible({ timeout: 5_000 });
+
+    // Enter the key name
+    const nameInput = page.getByTestId("api-key-name-input");
+    await nameInput.fill(API_KEY_NAME);
+
+    // Click Create button
+    const createButton = page.getByRole("button", { name: /^create$/i });
+    await createButton.click();
+
+    // Wait for the new key modal to appear with the generated key
+    const newKeyModal = page.getByTestId("new-api-key-modal");
+    await expect(newKeyModal).toBeVisible({ timeout: 10_000 });
+
+    // Capture the API key from the modal
+    const keyDisplay = newKeyModal.locator(".font-mono");
+    const apiKey = await keyDisplay.textContent();
+    expect(apiKey).toBeTruthy();
+    console.log(`Created API key: ${apiKey?.substring(0, 20)}...`);
+
+    // Close the modal
+    const closeButton = page.getByRole("button", { name: /close/i });
+    await closeButton.click();
+    await expect(newKeyModal).not.toBeVisible({ timeout: 5_000 });
+
+    // Take screenshot of API keys page
+    await page.screenshot({
+      path: "test-results/screenshots/api-keys-created.png",
+    });
+
+    // Test the API key by making a request to /api/v1/sandboxes/search
+    console.log("Testing API key with sandboxes search endpoint...");
+    const response = await request.get(`${baseURL}/api/v1/sandboxes/search`, {
+      headers: {
+        "X-Access-Token": apiKey!,
+      },
+    });
+
+    // Verify the response
+    expect(response.ok()).toBe(true);
+    const responseBody = await response.json();
+    console.log(
+      `Sandboxes search response: ${JSON.stringify(responseBody).substring(0, 200)}...`,
+    );
+
+    // Verify we got at least 1 sandbox (the currently running one)
+    // Response format: { items: [], next_page_id: string | null }
+    expect(responseBody).toHaveProperty("items");
+    expect(Array.isArray(responseBody.items)).toBe(true);
+    expect(responseBody.items.length).toBeGreaterThanOrEqual(1);
+    console.log(
+      `Found ${responseBody.items.length} sandbox(es) - API key works!`,
+    );
+
+    // Take screenshot after API test
+    await page.screenshot({
+      path: "test-results/screenshots/api-key-test-complete.png",
+    });
+  });
+});
+
+test.describe("Health Check Tests @smoke", () => {
+  test("application should be accessible", async ({ page, baseURL }) => {
+    const response = await page.goto(baseURL || "/");
+
+    // Verify we got a successful response
+    expect(response?.status()).toBeLessThan(400);
+  });
+
+  test("application should not have console errors on load", async ({
+    page,
+  }) => {
+    const errors: string[] = [];
+
+    page.on("console", (msg) => {
+      if (msg.type() === "error") {
+        // Filter out known acceptable errors
+        const text = msg.text();
+        if (
+          !text.includes("favicon") &&
+          !text.includes("sourcemap") &&
+          !text.includes("DevTools")
+        ) {
+          errors.push(text);
+        }
+      }
+    });
+
+    await page.goto("/");
+    await page
+      .waitForLoadState("networkidle", { timeout: 30_000 })
+      .catch(() => {});
+
+    // Log any errors found
+    if (errors.length > 0) {
+      console.log("Console errors found:", errors);
+    }
+
+    // Fail if critical errors exist
+    const criticalErrors = errors.filter(
+      (e) =>
+        e.includes("TypeError") ||
+        e.includes("ReferenceError") ||
+        e.includes("SyntaxError"),
+    );
+    expect(criticalErrors).toHaveLength(0);
+  });
+});
+
+test.describe("Environment Validation @smoke", () => {
+  test("should be connected to correct environment", async ({
+    page,
+    baseURL,
+  }) => {
+    await page.goto("/");
+
+    // Log the current environment for verification
+    console.log(`Testing against: ${baseURL}`);
+
+    // Verify we're on the expected domain
+    const url = page.url();
+    expect(url).toContain(new URL(baseURL || "").hostname);
+  });
+
+  test("should have valid SSL certificate", async ({ page, baseURL }) => {
+    // This test implicitly validates SSL because ignoreHTTPSErrors is true
+    // but we still want to verify the connection works
+    const response = await page.goto(baseURL || "/");
+    expect(response?.ok()).toBe(true);
+  });
+});
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "declaration": false,
+    "declarationMap": false,
+    "noEmit": true,
+    "types": ["node"],
+    "baseUrl": ".",
+    "paths": {
+      "@pages/*": ["pages/*"],
+      "@utils/*": ["utils/*"],
+      "@fixtures/*": ["fixtures/*"]
+    }
+  },
+  "include": ["**/*.ts"],
+  "exclude": ["node_modules", "playwright-report", "test-results"]
+}
@@ -0,0 +1 @@
+export * from "./test-helpers";
@@ -0,0 +1,179 @@
+import { Page } from "@playwright/test";
+
+/**
+ * Utility functions for integration tests
+ */
+
+/**
+ * Wait for a condition to be true with polling
+ */
+export async function waitForCondition(
+  condition: () => Promise<boolean>,
+  options: {
+    timeout?: number;
+    interval?: number;
+    message?: string;
+  } = {},
+): Promise<void> {
+  const {
+    timeout = 30_000,
+    interval = 500,
+    message = "Condition not met",
+  } = options;
+  const startTime = Date.now();
+
+  while (Date.now() - startTime < timeout) {
+    if (await condition()) {
+      return;
+    }
+    await new Promise((resolve) => setTimeout(resolve, interval));
+  }
+
+  throw new Error(`${message} within ${timeout}ms`);
+}
+
+/**
+ * Retry a function with exponential backoff
+ */
+export async function retry<T>(
+  fn: () => Promise<T>,
+  options: {
+    maxRetries?: number;
+    baseDelay?: number;
+    maxDelay?: number;
+  } = {},
+): Promise<T> {
+  const { maxRetries = 3, baseDelay = 1000, maxDelay = 10000 } = options;
+
+  let lastError: Error | undefined;
+
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error as Error;
+      if (attempt < maxRetries - 1) {
+        const delay = Math.min(baseDelay * 2 ** attempt, maxDelay);
+        console.log(
+          `Retry attempt ${attempt + 1}/${maxRetries} after ${delay}ms`,
+        );
+        await new Promise((resolve) => setTimeout(resolve, delay));
+      }
+    }
+  }
+
+  throw lastError;
+}
+
+/**
+ * Generate a unique test identifier
+ */
+export function generateTestId(): string {
+  return `test-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+}
+
+/**
+ * Log test step with timestamp
+ */
+export function logStep(step: string): void {
+  const timestamp = new Date().toISOString();
+  console.log(`[${timestamp}] ${step}`);
+}
+
+/**
+ * Take a screenshot with a descriptive name
+ */
+export async function takeScreenshot(
+  page: Page,
+  name: string,
+  options: { fullPage?: boolean } = {},
+): Promise<void> {
+  const timestamp = Date.now();
+  const sanitizedName = name.replace(/[^a-zA-Z0-9-_]/g, "-");
+  await page.screenshot({
+    path: `test-results/screenshots/${sanitizedName}-${timestamp}.png`,
+    fullPage: options.fullPage ?? true,
+  });
+}
+
+/**
+ * Wait for no console errors during an action
+ */
+export async function expectNoConsoleErrors(
+  page: Page,
+  action: () => Promise<void>,
+): Promise<void> {
+  const errors: string[] = [];
+
+  const handler = (msg: import("@playwright/test").ConsoleMessage) => {
+    if (msg.type() === "error") {
+      const text = msg.text();
+      // Filter known acceptable errors
+      if (!text.includes("favicon") && !text.includes("sourcemap")) {
+        errors.push(text);
+      }
+    }
+  };
+
+  page.on("console", handler);
+
+  try {
+    await action();
+  } finally {
+    page.off("console", handler);
+  }
+
+  if (errors.length > 0) {
+    throw new Error(`Console errors detected:\n${errors.join("\n")}`);
+  }
+}
+
+/**
+ * Environment helper to get environment-specific values
+ */
+export const env = {
+  baseUrl: process.env.BASE_URL || "https://staging.all-hands.dev",
+  testEnv: process.env.TEST_ENV || "staging",
+  testRepoUrl:
+    process.env.TEST_REPO_URL || "https://github.com/OpenHands/deploy",
+  testPrompt: process.env.TEST_PROMPT || "Flip a coin!",
+  isCI: process.env.CI === "true",
+
+  getFeatureBranchUrl(branchName: string): string {
+    // Sanitize branch name for URL
+    const sanitized = branchName.replace(/[^a-zA-Z0-9-]/g, "-").toLowerCase();
+    return `https://${sanitized}.staging.all-hands.dev`;
+  },
+};
+
+/**
+ * Check if running in a specific environment
+ */
+export function isEnvironment(
+  env: "staging" | "production" | "local",
+): boolean {
+  const baseUrl = process.env.BASE_URL || "";
+
+  switch (env) {
+    case "staging":
+      return baseUrl.includes("staging.all-hands.dev");
+    case "production":
+      return baseUrl.includes("app.all-hands.dev");
+    case "local":
+      return baseUrl.includes("localhost");
+    default:
+      return false;
+  }
+}
+
+/**
+ * Skip test in specific environments
+ */
+export function skipInEnvironment(
+  test: { skip: (condition: boolean, message: string) => void },
+  envs: ("staging" | "production" | "local")[],
+  reason: string,
+): void {
+  const shouldSkip = envs.some(isEnvironment);
+  test.skip(shouldSkip, `Skipped in ${envs.join(", ")}: ${reason}`);
+}
@@ -106,15 +106,14 @@ class EventServiceBase(EventService, ABC):
                reverse=(sort_order == EventSortOrder.TIMESTAMP_DESC),
            )

-        # Apply pagination to items (not paths)
        start_offset = 0
        next_page_id = None
        if page_id:
            start_offset = int(page_id)
-            items = items[start_offset:]
-        if len(items) > limit:
+            paths = paths[start_offset:]
+        if len(paths) > limit:
+            paths = paths[:limit]
            next_page_id = str(start_offset + limit)
-            items = items[:limit]

        return EventPage(items=items, next_page_id=next_page_id)

@@ -6,7 +6,7 @@ import logging
 import pkgutil
 from uuid import UUID

-from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
+from fastapi import APIRouter, Depends, HTTPException, Response, status
 from fastapi.security import APIKeyHeader
 from jwt import InvalidTokenError
 from pydantic import SecretStr
@@ -23,87 +23,61 @@ from openhands.app_server.config import (
    depends_app_conversation_info_service,
    depends_event_service,
    depends_jwt_service,
+    depends_sandbox_service,
    get_event_callback_service,
-    get_global_config,
-    get_sandbox_service,
 )
 from openhands.app_server.errors import AuthError
 from openhands.app_server.event.event_service import EventService
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo
+from openhands.app_server.sandbox.sandbox_service import SandboxService
 from openhands.app_server.services.injector import InjectorState
 from openhands.app_server.services.jwt_service import JwtService
 from openhands.app_server.user.auth_user_context import AuthUserContext
 from openhands.app_server.user.specifiy_user_context import (
-    ADMIN,
    USER_CONTEXT_ATTR,
    SpecifyUserContext,
+    as_admin,
 )
+from openhands.app_server.user.user_context import UserContext
 from openhands.integrations.provider import ProviderType
 from openhands.sdk import ConversationExecutionStatus, Event
 from openhands.sdk.event import ConversationStateUpdateEvent
-from openhands.server.types import AppMode
 from openhands.server.user_auth.default_user_auth import DefaultUserAuth
 from openhands.server.user_auth.user_auth import (
    get_for_user as get_user_auth_for_user,
 )

 router = APIRouter(prefix='/webhooks', tags=['Webhooks'])
+sandbox_service_dependency = depends_sandbox_service()
 event_service_dependency = depends_event_service()
 app_conversation_info_service_dependency = depends_app_conversation_info_service()
 jwt_dependency = depends_jwt_service()
-app_mode = get_global_config().app_mode
 _logger = logging.getLogger(__name__)


 async def valid_sandbox(
-    request: Request,
+    user_context: UserContext = Depends(as_admin),
    session_api_key: str = Depends(
        APIKeyHeader(name='X-Session-API-Key', auto_error=False)
    ),
+    sandbox_service: SandboxService = sandbox_service_dependency,
 ) -> SandboxInfo:
-    """Use a session api key for validation, and get a sandbox. Subsequent actions
-    are executed in the context of the owner of the sandbox"""
    if not session_api_key:
        raise HTTPException(
            status.HTTP_401_UNAUTHORIZED, detail='X-Session-API-Key header is required'
        )

-    # Create a state which will be used internally only for this operation
-    state = InjectorState()
-
-    # Since we need access to all sandboxes, this is executed in the context of the admin.
-    setattr(state, USER_CONTEXT_ATTR, ADMIN)
-    async with get_sandbox_service(state) as sandbox_service:
-        sandbox_info = await sandbox_service.get_sandbox_by_session_api_key(
-            session_api_key
+    sandbox_info = await sandbox_service.get_sandbox_by_session_api_key(session_api_key)
+    if sandbox_info is None:
+        raise HTTPException(
+            status.HTTP_401_UNAUTHORIZED, detail='Invalid session API key'
        )
-        if sandbox_info is None:
-            raise HTTPException(
-                status.HTTP_401_UNAUTHORIZED, detail='Invalid session API key'
-            )
-
-        # In SAAS Mode there is always a user, so we set the owner of the sandbox
-        # as the current user (Validated by the session_api_key they provided)
-        if sandbox_info.created_by_user_id:
-            setattr(
-                request.state,
-                USER_CONTEXT_ATTR,
-                SpecifyUserContext(sandbox_info.created_by_user_id),
-            )
-        elif app_mode == AppMode.SAAS:
-            _logger.error(
-                'Sandbox had no user specified', extra={'sandbox_id': sandbox_info.id}
-            )
-            raise HTTPException(
-                status.HTTP_401_UNAUTHORIZED, detail='Sandbox had no user specified'
-            )
-
-        return sandbox_info
+    return sandbox_info


 async def valid_conversation(
    conversation_id: UUID,
-    sandbox_info: SandboxInfo = Depends(valid_sandbox),
+    sandbox_info: SandboxInfo,
    app_conversation_info_service: AppConversationInfoService = app_conversation_info_service_dependency,
 ) -> AppConversationInfo:
    app_conversation_info = (
@@ -116,11 +90,9 @@ async def valid_conversation(
            sandbox_id=sandbox_info.id,
            created_by_user_id=sandbox_info.created_by_user_id,
        )
-
-    # Sanity check - Make sure that the conversation and sandbox were created by the same user
    if app_conversation_info.created_by_user_id != sandbox_info.created_by_user_id:
+        # Make sure that the conversation and sandbox were created by the same user
        raise AuthError()
-
    return app_conversation_info


@@ -167,11 +139,15 @@ async def on_conversation_update(
 async def on_event(
    events: list[Event],
    conversation_id: UUID,
-    app_conversation_info: AppConversationInfo = Depends(valid_conversation),
+    sandbox_info: SandboxInfo = Depends(valid_sandbox),
    app_conversation_info_service: AppConversationInfoService = app_conversation_info_service_dependency,
    event_service: EventService = event_service_dependency,
 ) -> Success:
    """Webhook callback for when event stream events occur."""
+    app_conversation_info = await valid_conversation(
+        conversation_id, sandbox_info, app_conversation_info_service
+    )
+
    try:
        # Save events...
        await asyncio.gather(
@@ -13,7 +13,7 @@ from openhands.sdk.utils.models import DiscriminatedUnionMixin

 # The version of the agent server to use for deployments.
 # Typically this will be the same as the values from the pyproject.toml
-AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:1.13.0-python'
+AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:1.12.0-python'


 class SandboxSpecService(ABC):
@@ -6367,14 +6367,14 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0

 [[package]]
 name = "openhands-agent-server"
-version = "1.13.0"
+version = "1.12.0"
 description = "OpenHands Agent Server - REST/WebSocket interface for OpenHands AI Agent"
 optional = false
 python-versions = ">=3.12"
 groups = ["main"]
 files = [
-    {file = "openhands_agent_server-1.13.0-py3-none-any.whl", hash = "sha256:88bb8bfb03ff0cc7a7d32ffabd108d0a284f4333f33a9de27ce158b6d828bc29"},
-    {file = "openhands_agent_server-1.13.0.tar.gz", hash = "sha256:6f8b296c0f26a478d4eb49668a353e2b6997c39022c2bbcc36325f5f08887a7a"},
+    {file = "openhands_agent_server-1.12.0-py3-none-any.whl", hash = "sha256:3bd62fef10092f1155af116a8a7417041d574eff9d4e4b6f7a24bfc432de2fad"},
+    {file = "openhands_agent_server-1.12.0.tar.gz", hash = "sha256:7ea7ce579175f713ed68b68cde5d685ef694627ac7bbff40d2e22913f065c46d"},
 ]

 [package.dependencies]
@@ -6391,14 +6391,14 @@ wsproto = ">=1.2.0"

 [[package]]
 name = "openhands-sdk"
-version = "1.13.0"
+version = "1.12.0"
 description = "OpenHands SDK - Core functionality for building AI agents"
 optional = false
 python-versions = ">=3.12"
 groups = ["main"]
 files = [
-    {file = "openhands_sdk-1.13.0-py3-none-any.whl", hash = "sha256:ec83f9fa2934aae9c4ce1c0365a7037f7e17869affa44a40e71ba49d2bef7185"},
-    {file = "openhands_sdk-1.13.0.tar.gz", hash = "sha256:fbb2a2dc4852ea23cc697a36fb3f95ca47cfef432b0d195c496de6f374caad9c"},
+    {file = "openhands_sdk-1.12.0-py3-none-any.whl", hash = "sha256:857793f5c27fd63c0d4d37762550e6c504a03dd06116475c23adcc14bb5c4c02"},
+    {file = "openhands_sdk-1.12.0.tar.gz", hash = "sha256:ac348e7134ea21e1ab453978962504aff8eb47e62df1fb7a503d769d55658ea9"},
 ]

 [package.dependencies]
@@ -6421,14 +6421,14 @@ boto3 = ["boto3 (>=1.35.0)"]

 [[package]]
 name = "openhands-tools"
-version = "1.13.0"
+version = "1.12.0"
 description = "OpenHands Tools - Runtime tools for AI agents"
 optional = false
 python-versions = ">=3.12"
 groups = ["main"]
 files = [
-    {file = "openhands_tools-1.13.0-py3-none-any.whl", hash = "sha256:87073b868e20f9c769497f480e0d15b14ca41314c3d1cb5076029f37408a1d68"},
-    {file = "openhands_tools-1.13.0.tar.gz", hash = "sha256:e1181701efab5bc3133566e3b1640027824147438959cd8ce7430c941896704d"},
+    {file = "openhands_tools-1.12.0-py3-none-any.whl", hash = "sha256:57207e9e30f9d7fe9121cd21b072580cfdc2a00831edeaf8e8d685d721bb9e33"},
+    {file = "openhands_tools-1.12.0.tar.gz", hash = "sha256:f2b4d81d0b6771f5416f8b702db09a14999fa8e553073bcf38f344e29aae770c"},
 ]

 [package.dependencies]
@@ -13579,22 +13579,24 @@ files = [

 [[package]]
 name = "tornado"
-version = "6.5.5"
+version = "6.5.4"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 optional = false
 python-versions = ">=3.9"
 groups = ["main", "runtime"]
 files = [
-    {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa"},
-    {file = "tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521"},
-    {file = "tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5"},
-    {file = "tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07"},
-    {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e"},
-    {file = "tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca"},
-    {file = "tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7"},
-    {file = "tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b"},
-    {file = "tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6"},
-    {file = "tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9"},
+    {file = "tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9"},
+    {file = "tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843"},
+    {file = "tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17"},
+    {file = "tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335"},
+    {file = "tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f"},
+    {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84"},
+    {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f"},
+    {file = "tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8"},
+    {file = "tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1"},
+    {file = "tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc"},
+    {file = "tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1"},
+    {file = "tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7"},
 ]

 [[package]]
@@ -14846,4 +14848,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12,<3.14"
-content-hash = "8988a1da93e30d92a44ff7690ad39ce34a164c3a7b249e0d63a270a505bd52a9"
+content-hash = "7319bfec87aed5ed2803ad7cb947f875e83fa62216b1662a87b9b84078dc03e4"
@@ -57,9 +57,9 @@ dependencies = [
  "numpy",
  "openai==2.8",
  "openhands-aci==0.3.3",
-  "openhands-agent-server==1.13",
-  "openhands-sdk==1.13",
-  "openhands-tools==1.13",
+  "openhands-agent-server==1.12",
+  "openhands-sdk==1.12",
+  "openhands-tools==1.12",
  "opentelemetry-api>=1.33.1",
  "opentelemetry-exporter-otlp-proto-grpc>=1.33.1",
  "pathspec>=0.12.1",
@@ -249,9 +249,9 @@ e2b-code-interpreter = { version = "^2.0.0", optional = true }
 pybase62 = "^1.0.0"

 # V1 dependencies
-openhands-sdk = "1.13"
-openhands-agent-server = "1.13"
-openhands-tools = "1.13"
+openhands-sdk = "1.12"
+openhands-agent-server = "1.12"
+openhands-tools = "1.12"
 jwcrypto = ">=1.5.6"
 sqlalchemy = { extras = [ "asyncio" ], version = "^2.0.40" }
 pg8000 = "^1.31.5"
@@ -161,113 +161,6 @@ class TestFilesystemEventServiceSearchEvents:
        assert hasattr(result, 'next_page_id')
        assert len(result.items) == 3

-    @pytest.mark.asyncio
-    async def test_search_events_pagination_limits_results(
-        self, service: FilesystemEventService
-    ):
-        """Test that search_events respects the limit parameter for pagination."""
-        conversation_id = uuid4()
-        total_events = 10
-        page_limit = 3
-
-        # Create more events than the limit
-        for _ in range(total_events):
-            await service.save_event(conversation_id, create_token_event())
-
-        # First page should return only 'limit' events
-        result = await service.search_events(conversation_id, limit=page_limit)
-
-        assert len(result.items) == page_limit
-        assert result.next_page_id is not None
-
-    @pytest.mark.asyncio
-    async def test_search_events_pagination_iterates_all_events(
-        self, service: FilesystemEventService
-    ):
-        """Test that pagination correctly iterates through all events without duplicates.
-
-        This test verifies the fix for a bug where pagination was applied to 'paths'
-        instead of 'items', causing all events to be returned on every page.
-        """
-        conversation_id = uuid4()
-        total_events = 10
-        page_limit = 3
-
-        # Create events and track their IDs
-        created_event_ids = set()
-        for _ in range(total_events):
-            event = create_token_event()
-            created_event_ids.add(event.id)
-            await service.save_event(conversation_id, event)
-
-        # Iterate through all pages and collect event IDs
-        collected_event_ids = set()
-        page_id = None
-        page_count = 0
-
-        while True:
-            result = await service.search_events(
-                conversation_id, page_id=page_id, limit=page_limit
-            )
-            page_count += 1
-
-            for item in result.items:
-                # Verify no duplicates - this would fail with the old buggy code
-                assert item.id not in collected_event_ids, (
-                    f'Duplicate event {item.id} found on page {page_count}'
-                )
-                collected_event_ids.add(item.id)
-
-            if result.next_page_id is None:
-                break
-            page_id = result.next_page_id
-
-        # Verify we got all events exactly once
-        assert collected_event_ids == created_event_ids
-        assert len(collected_event_ids) == total_events
-
-        # With 10 events and limit of 3, we should have 4 pages (3+3+3+1)
-        expected_pages = (total_events + page_limit - 1) // page_limit
-        assert page_count == expected_pages
-
-    @pytest.mark.asyncio
-    async def test_search_events_pagination_with_filters(
-        self, service: FilesystemEventService
-    ):
-        """Test that pagination works correctly when combined with filters."""
-        conversation_id = uuid4()
-
-        # Create a mix of events
-        token_events = [create_token_event() for _ in range(5)]
-        pause_events = [create_pause_event() for _ in range(3)]
-
-        for event in token_events + pause_events:
-            await service.save_event(conversation_id, event)
-
-        # Search only for token events with pagination
-        page_limit = 2
-        collected_ids = set()
-        page_id = None
-
-        while True:
-            result = await service.search_events(
-                conversation_id,
-                kind__eq='TokenEvent',
-                page_id=page_id,
-                limit=page_limit,
-            )
-
-            for item in result.items:
-                assert item.kind == 'TokenEvent'
-                collected_ids.add(item.id)
-
-            if result.next_page_id is None:
-                break
-            page_id = result.next_page_id
-
-        # Should have found all 5 token events
-        assert len(collected_ids) == 5
-

 class TestFilesystemEventServiceIntegration:
    """Integration tests for FilesystemEventService."""
@@ -3,65 +3,18 @@
 This module tests the webhook authentication and authorization logic.
 """

-import contextlib
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 from uuid import uuid4

 import pytest
-from fastapi import FastAPI, HTTPException, status
-from fastapi.testclient import TestClient
+from fastapi import HTTPException, status

-from openhands.app_server.event_callback.webhook_router import (
-    router as webhook_router,
-)
 from openhands.app_server.event_callback.webhook_router import (
    valid_conversation,
    valid_sandbox,
 )
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
-from openhands.app_server.user.specifiy_user_context import (
-    USER_CONTEXT_ATTR,
-    SpecifyUserContext,
-)
-from openhands.server.types import AppMode
-
-
-class MockRequestState:
-    """A mock request state that tracks attribute assignments."""
-
-    def __init__(self):
-        self._state = {}
-        self._attributes = {}
-
-    def __setattr__(self, name, value):
-        if name.startswith('_'):
-            super().__setattr__(name, value)
-        else:
-            self._attributes[name] = value
-
-    def __getattr__(self, name):
-        if name in self._attributes:
-            return self._attributes[name]
-        raise AttributeError(
-            f"'{type(self).__name__}' object has no attribute '{name}'"
-        )
-
-
-def create_mock_request():
-    """Create a mock FastAPI Request object with proper state."""
-    request = MagicMock()
-    request.state = MockRequestState()
-    return request
-
-
-def create_sandbox_service_context_manager(sandbox_service):
-    """Create an async context manager that yields the given sandbox service."""
-
-    @contextlib.asynccontextmanager
-    async def _context_manager(state, request=None):
-        yield sandbox_service
-
-    return _context_manager
+from openhands.app_server.user.specifiy_user_context import ADMIN


 class TestValidSandbox:
@@ -69,15 +22,14 @@ class TestValidSandbox:

    @pytest.mark.asyncio
    async def test_valid_sandbox_with_valid_api_key(self):
-        """Test that valid API key returns sandbox info and sets user_context."""
+        """Test that valid API key returns sandbox info."""
        # Arrange
        session_api_key = 'valid-api-key-123'
-        user_id = 'user-123'
        expected_sandbox = SandboxInfo(
            id='sandbox-123',
            status=SandboxStatus.RUNNING,
            session_api_key=session_api_key,
-            created_by_user_id=user_id,
+            created_by_user_id='user-123',
            sandbox_spec_id='spec-123',
        )

@@ -86,17 +38,12 @@ class TestValidSandbox:
            return_value=expected_sandbox
        )

-        mock_request = create_mock_request()
-
        # Act
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            result = await valid_sandbox(
-                request=mock_request,
-                session_api_key=session_api_key,
-            )
+        result = await valid_sandbox(
+            user_context=ADMIN,
+            session_api_key=session_api_key,
+            sandbox_service=mock_sandbox_service,
+        )

        # Assert
        assert result == expected_sandbox
@@ -104,136 +51,18 @@ class TestValidSandbox:
            session_api_key
        )

-        # Verify user_context is set correctly on request.state
-        assert USER_CONTEXT_ATTR in mock_request.state._attributes
-        user_context = mock_request.state._attributes[USER_CONTEXT_ATTR]
-        assert isinstance(user_context, SpecifyUserContext)
-        assert user_context.user_id == user_id
-
-    @pytest.mark.asyncio
-    async def test_valid_sandbox_sets_user_context_to_sandbox_owner(self):
-        """Test that user_context is set to the sandbox owner's user ID."""
-        # Arrange
-        session_api_key = 'valid-api-key'
-        sandbox_owner_id = 'sandbox-owner-user-id'
-        expected_sandbox = SandboxInfo(
-            id='sandbox-456',
-            status=SandboxStatus.RUNNING,
-            session_api_key=session_api_key,
-            created_by_user_id=sandbox_owner_id,
-            sandbox_spec_id='spec-456',
-        )
-
-        mock_sandbox_service = AsyncMock()
-        mock_sandbox_service.get_sandbox_by_session_api_key = AsyncMock(
-            return_value=expected_sandbox
-        )
-
-        mock_request = create_mock_request()
-
-        # Act
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            await valid_sandbox(
-                request=mock_request,
-                session_api_key=session_api_key,
-            )
-
-        # Assert - user_context should be set to the sandbox owner
-        assert USER_CONTEXT_ATTR in mock_request.state._attributes
-        user_context = mock_request.state._attributes[USER_CONTEXT_ATTR]
-        assert isinstance(user_context, SpecifyUserContext)
-        assert user_context.user_id == sandbox_owner_id
-
-    @pytest.mark.asyncio
-    async def test_valid_sandbox_no_user_context_when_no_user_id(self):
-        """Test that user_context is not set when sandbox has no created_by_user_id."""
-        # Arrange
-        session_api_key = 'valid-api-key'
-        expected_sandbox = SandboxInfo(
-            id='sandbox-789',
-            status=SandboxStatus.RUNNING,
-            session_api_key=session_api_key,
-            created_by_user_id=None,  # No user ID
-            sandbox_spec_id='spec-789',
-        )
-
-        mock_sandbox_service = AsyncMock()
-        mock_sandbox_service.get_sandbox_by_session_api_key = AsyncMock(
-            return_value=expected_sandbox
-        )
-
-        mock_request = create_mock_request()
-
-        # Act
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            result = await valid_sandbox(
-                request=mock_request,
-                session_api_key=session_api_key,
-            )
-
-        # Assert - sandbox is returned but user_context should NOT be set
-        assert result == expected_sandbox
-
-        # Verify user_context is NOT set on request.state
-        assert USER_CONTEXT_ATTR not in mock_request.state._attributes
-
-    @pytest.mark.asyncio
-    async def test_valid_sandbox_no_user_context_when_no_user_id_raises_401_in_saas_mode(
-        self,
-    ):
-        """Test that user_context is not set when sandbox has no created_by_user_id."""
-        # Arrange
-        session_api_key = 'valid-api-key'
-        expected_sandbox = SandboxInfo(
-            id='sandbox-789',
-            status=SandboxStatus.RUNNING,
-            session_api_key=session_api_key,
-            created_by_user_id=None,  # No user ID
-            sandbox_spec_id='spec-789',
-        )
-
-        mock_sandbox_service = AsyncMock()
-        mock_sandbox_service.get_sandbox_by_session_api_key = AsyncMock(
-            return_value=expected_sandbox
-        )
-
-        mock_request = create_mock_request()
-
-        # Act
-        with (
-            patch(
-                'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-                create_sandbox_service_context_manager(mock_sandbox_service),
-            ),
-            patch(
-                'openhands.app_server.event_callback.webhook_router.app_mode',
-                AppMode.SAAS,
-            ),
-        ):
-            with pytest.raises(HTTPException) as excinfo:
-                await valid_sandbox(
-                    request=mock_request,
-                    session_api_key=session_api_key,
-                )
-            assert excinfo.value.status_code == 401
-
    @pytest.mark.asyncio
    async def test_valid_sandbox_without_api_key_raises_401(self):
        """Test that missing API key raises 401 error."""
        # Arrange
-        mock_request = create_mock_request()
+        mock_sandbox_service = AsyncMock()

        # Act & Assert
        with pytest.raises(HTTPException) as exc_info:
            await valid_sandbox(
-                request=mock_request,
+                user_context=ADMIN,
                session_api_key=None,
+                sandbox_service=mock_sandbox_service,
            )

        assert exc_info.value.status_code == status.HTTP_401_UNAUTHORIZED
@@ -249,18 +78,13 @@ class TestValidSandbox:
            return_value=None
        )

-        mock_request = create_mock_request()
-
        # Act & Assert
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            with pytest.raises(HTTPException) as exc_info:
-                await valid_sandbox(
-                    request=mock_request,
-                    session_api_key=session_api_key,
-                )
+        with pytest.raises(HTTPException) as exc_info:
+            await valid_sandbox(
+                user_context=ADMIN,
+                session_api_key=session_api_key,
+                sandbox_service=mock_sandbox_service,
+            )

        assert exc_info.value.status_code == status.HTTP_401_UNAUTHORIZED
        assert 'Invalid session API key' in exc_info.value.detail
@@ -271,13 +95,13 @@ class TestValidSandbox:
        # Arrange - empty string is falsy, so it gets rejected at the check
        session_api_key = ''
        mock_sandbox_service = AsyncMock()
-        mock_request = create_mock_request()

        # Act & Assert - should raise 401 because empty string fails the truth check
        with pytest.raises(HTTPException) as exc_info:
            await valid_sandbox(
-                request=mock_request,
+                user_context=ADMIN,
                session_api_key=session_api_key,
+                sandbox_service=mock_sandbox_service,
            )

        assert exc_info.value.status_code == status.HTTP_401_UNAUTHORIZED
@@ -439,17 +263,12 @@ class TestWebhookAuthenticationIntegration:
            return_value=conversation_info
        )

-        mock_request = create_mock_request()
-
        # Act - Call valid_sandbox first
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            sandbox_result = await valid_sandbox(
-                request=mock_request,
-                session_api_key=session_api_key,
-            )
+        sandbox_result = await valid_sandbox(
+            user_context=ADMIN,
+            session_api_key=session_api_key,
+            sandbox_service=mock_sandbox_service,
+        )

        # Then call valid_conversation
        conversation_result = await valid_conversation(
@@ -472,18 +291,13 @@ class TestWebhookAuthenticationIntegration:
            return_value=None
        )

-        mock_request = create_mock_request()
-
        # Act & Assert - Should fail at valid_sandbox
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            with pytest.raises(HTTPException) as exc_info:
-                await valid_sandbox(
-                    request=mock_request,
-                    session_api_key=session_api_key,
-                )
+        with pytest.raises(HTTPException) as exc_info:
+            await valid_sandbox(
+                user_context=ADMIN,
+                session_api_key=session_api_key,
+                sandbox_service=mock_sandbox_service,
+            )

        assert exc_info.value.status_code == status.HTTP_401_UNAUTHORIZED

@@ -514,17 +328,12 @@ class TestWebhookAuthenticationIntegration:
            return_value=different_user_info
        )

-        mock_request = create_mock_request()
-
        # Act - valid_sandbox succeeds
-        with patch(
-            'openhands.app_server.event_callback.webhook_router.get_sandbox_service',
-            create_sandbox_service_context_manager(mock_sandbox_service),
-        ):
-            sandbox_result = await valid_sandbox(
-                request=mock_request,
-                session_api_key=session_api_key,
-            )
+        sandbox_result = await valid_sandbox(
+            user_context=ADMIN,
+            session_api_key=session_api_key,
+            sandbox_service=mock_sandbox_service,
+        )

        # But valid_conversation fails
        from openhands.app_server.errors import AuthError
@@ -535,88 +344,3 @@ class TestWebhookAuthenticationIntegration:
                sandbox_info=sandbox_result,
                app_conversation_info_service=mock_conversation_service,
            )
-
-
-class TestWebhookRouterHTTPIntegration:
-    """Integration tests for webhook router HTTP layer.
-
-    These tests validate that FastAPI routing correctly extracts conversation_id
-    from the request body rather than requiring it as a query parameter.
-    """
-
-    def test_conversation_update_endpoint_does_not_require_query_param(self):
-        """Test that /webhooks/conversations endpoint accepts conversation_id in body only.
-
-        This test validates the fix for the regression where the endpoint incorrectly
-        required conversation_id as a query parameter due to using Depends(valid_conversation).
-
-        The endpoint should:
-        1. Accept POST requests without any query parameters
-        2. Extract conversation_id from the request body (conversation_info.id)
-        3. Return 401 (not 422) when auth fails, proving the request was parsed correctly
-        """
-        # Create a minimal FastAPI app with just the webhook router
-        app = FastAPI()
-        app.include_router(webhook_router, prefix='/api/v1')
-
-        client = TestClient(app, raise_server_exceptions=False)
-
-        # Create a valid request body with conversation_id in it
-        conversation_id = str(uuid4())
-        request_body = {
-            'id': conversation_id,
-            'execution_status': 'running',
-            'agent': {
-                'llm': {
-                    'model': 'gpt-4',
-                },
-            },
-            'stats': {
-                'usage_to_metrics': {},
-            },
-        }
-
-        # POST to /webhooks/conversations WITHOUT any query parameters
-        # If the old bug existed (conversation_id required as query param),
-        # FastAPI would return 422 Unprocessable Entity
-        response = client.post(
-            '/api/v1/webhooks/conversations',
-            json=request_body,
-            # No X-Session-API-Key header - should fail auth but NOT validation
-        )
-
-        # We expect 401 Unauthorized (missing session API key)
-        # NOT 422 Unprocessable Entity (which would indicate conversation_id
-        # was incorrectly required as a query parameter)
-        assert response.status_code == status.HTTP_401_UNAUTHORIZED, (
-            f'Expected 401 (auth failure), got {response.status_code}. '
-            f'If 422, the endpoint incorrectly requires conversation_id as query param. '
-            f'Response: {response.json()}'
-        )
-        assert response.json()['detail'] == 'X-Session-API-Key header is required'
-
-    def test_events_endpoint_still_requires_conversation_id_in_path(self):
-        """Test that /webhooks/events/{conversation_id} correctly requires path param.
-
-        This ensures we didn't accidentally break the events endpoint which legitimately
-        requires conversation_id as a path parameter.
-        """
-        # Create a minimal FastAPI app with just the webhook router
-        app = FastAPI()
-        app.include_router(webhook_router, prefix='/api/v1')
-
-        client = TestClient(app, raise_server_exceptions=False)
-
-        conversation_id = str(uuid4())
-        request_body = []  # Empty events list
-
-        # POST to /webhooks/events/{conversation_id} with path parameter
-        response = client.post(
-            f'/api/v1/webhooks/events/{conversation_id}',
-            json=request_body,
-            # No X-Session-API-Key header - should fail auth but NOT validation
-        )
-
-        # We expect 401 Unauthorized (missing session API key)
-        assert response.status_code == status.HTTP_401_UNAUTHORIZED
-        assert response.json()['detail'] == 'X-Session-API-Key header is required'
@@ -19,7 +19,6 @@ from openhands.app_server.app_conversation.app_conversation_models import (
 from openhands.app_server.app_conversation.sql_app_conversation_info_service import (
    SQLAppConversationInfoService,
 )
-from openhands.app_server.event_callback.webhook_router import on_conversation_update
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
 from openhands.app_server.user.specifiy_user_context import SpecifyUserContext
 from openhands.app_server.utils.sql_utils import Base
@@ -119,6 +118,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - Saved conversation retains the parent_conversation_id
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        parent_id = uuid4()
@@ -135,11 +137,12 @@ class TestOnConversationUpdateParentConversationId:
            parent_conversation_id=parent_id,
        )

-        # Act - call on_conversation_update directly with mocked valid_conversation
+        # Mock valid_conversation to return existing conversation
        with patch(
            'openhands.app_server.event_callback.webhook_router.valid_conversation',
            return_value=existing_conv,
        ):
+            # Act
            result = await on_conversation_update(
                conversation_info=mock_conversation_info,
                sandbox_info=sandbox_info,
@@ -172,6 +175,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - Saved conversation has parent_conversation_id as None
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        conversation_id = mock_conversation_info.id
@@ -185,11 +191,12 @@ class TestOnConversationUpdateParentConversationId:
            parent_conversation_id=None,
        )

-        # Act - call on_conversation_update directly with mocked valid_conversation
+        # Mock valid_conversation to return existing conversation
        with patch(
            'openhands.app_server.event_callback.webhook_router.valid_conversation',
            return_value=existing_conv,
        ):
+            # Act
            result = await on_conversation_update(
                conversation_info=mock_conversation_info,
                sandbox_info=sandbox_info,
@@ -221,6 +228,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - New conversation has parent_conversation_id as None
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        conversation_id = mock_conversation_info.id
@@ -232,11 +242,12 @@ class TestOnConversationUpdateParentConversationId:
            created_by_user_id=sandbox_info.created_by_user_id,
        )

-        # Act - call on_conversation_update directly with mocked valid_conversation
+        # Mock valid_conversation to return stub (as it would for new conversation)
        with patch(
            'openhands.app_server.event_callback.webhook_router.valid_conversation',
            return_value=stub_conv,
        ):
+            # Act
            result = await on_conversation_update(
                conversation_info=mock_conversation_info,
                sandbox_info=sandbox_info,
@@ -269,6 +280,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - All metadata including parent_conversation_id is preserved
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        parent_id = uuid4()
@@ -288,11 +302,12 @@ class TestOnConversationUpdateParentConversationId:
            parent_conversation_id=parent_id,
        )

-        # Act - call on_conversation_update directly with mocked valid_conversation
+        # Mock valid_conversation to return existing conversation
        with patch(
            'openhands.app_server.event_callback.webhook_router.valid_conversation',
            return_value=existing_conv,
        ):
+            # Act
            result = await on_conversation_update(
                conversation_info=mock_conversation_info,
                sandbox_info=sandbox_info,
@@ -334,6 +349,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - Parent_conversation_id remains unchanged after all updates
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        parent_id = uuid4()
@@ -348,8 +366,9 @@ class TestOnConversationUpdateParentConversationId:
            parent_conversation_id=parent_id,
        )

-        # Act - Update multiple times, simulating what valid_conversation would return
-        for _ in range(3):
+        # Mock valid_conversation to return conversation with parent
+        # In real scenario, this would be retrieved from DB after first save
+        async def mock_valid_conv(*args, **kwargs):
            # After first save, get from DB with parent preserved
            saved = await app_conversation_info_service.get_app_conversation_info(
                conversation_id
@@ -357,20 +376,21 @@ class TestOnConversationUpdateParentConversationId:
            if saved:
                # Override created_by_user_id for auth check
                saved.created_by_user_id = 'user_123'
-                existing = saved
-            else:
-                existing = initial_conv
+                return saved
+            return initial_conv

-            with patch(
-                'openhands.app_server.event_callback.webhook_router.valid_conversation',
-                return_value=existing,
-            ):
+        with patch(
+            'openhands.app_server.event_callback.webhook_router.valid_conversation',
+            side_effect=mock_valid_conv,
+        ):
+            # Act - Update multiple times
+            for _ in range(3):
                result = await on_conversation_update(
                    conversation_info=mock_conversation_info,
                    sandbox_info=sandbox_info,
                    app_conversation_info_service=app_conversation_info_service,
                )
-            assert isinstance(result, Success)
+                assert isinstance(result, Success)

        # Assert
        saved_conv = await app_conversation_info_service.get_app_conversation_info(
@@ -397,6 +417,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - Function returns early, no updates are made
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        parent_id = uuid4()
@@ -418,11 +441,12 @@ class TestOnConversationUpdateParentConversationId:
        # Set conversation to DELETING status
        mock_conversation_info.execution_status = ConversationExecutionStatus.DELETING

-        # Act - call on_conversation_update directly with mocked valid_conversation
+        # Mock valid_conversation (though it won't be called for DELETING status)
        with patch(
            'openhands.app_server.event_callback.webhook_router.valid_conversation',
            return_value=existing_conv,
        ):
+            # Act
            result = await on_conversation_update(
                conversation_info=mock_conversation_info,
                sandbox_info=sandbox_info,
@@ -457,6 +481,9 @@ class TestOnConversationUpdateParentConversationId:
        Assert:
            - Parent_conversation_id is preserved and title is generated
        """
+        from openhands.app_server.event_callback.webhook_router import (
+            on_conversation_update,
+        )

        # Arrange
        parent_id = uuid4()
@@ -471,11 +498,12 @@ class TestOnConversationUpdateParentConversationId:
            parent_conversation_id=parent_id,
        )

-        # Act - call on_conversation_update directly with mocked valid_conversation
+        # Mock valid_conversation to return existing conversation
        with patch(
            'openhands.app_server.event_callback.webhook_router.valid_conversation',
            return_value=existing_conv,
        ):
+            # Act
            result = await on_conversation_update(
                conversation_info=mock_conversation_info,
                sandbox_info=sandbox_info,
@@ -451,9 +451,11 @@ class TestOnEventStatsProcessing:
    @pytest.mark.asyncio
    async def test_on_event_processes_stats_events(self):
        """Test that on_event processes stats events."""
-        from unittest.mock import patch
-
        from openhands.app_server.event_callback.webhook_router import on_event
+        from openhands.app_server.sandbox.sandbox_models import (
+            SandboxInfo,
+            SandboxStatus,
+        )

        conversation_id = uuid4()
        sandbox_id = 'sandbox_123'
@@ -480,6 +482,15 @@ class TestOnEventStatsProcessing:

        events = [stats_event, other_event]

+        # Mock dependencies
+        mock_sandbox = SandboxInfo(
+            id=sandbox_id,
+            status=SandboxStatus.RUNNING,
+            session_api_key='test_key',
+            created_by_user_id='user_123',
+            sandbox_spec_id='spec_123',
+        )
+
        mock_app_conversation_info = AppConversationInfo(
            id=conversation_id,
            sandbox_id=sandbox_id,
@@ -488,6 +499,9 @@ class TestOnEventStatsProcessing:

        mock_event_service = AsyncMock()
        mock_app_conversation_info_service = AsyncMock()
+        mock_app_conversation_info_service.get_app_conversation_info.return_value = (
+            mock_app_conversation_info
+        )

        # Set up process_stats_event to call update_conversation_statistics
        async def process_stats_event_side_effect(event, conversation_id):
@@ -505,33 +519,44 @@ class TestOnEventStatsProcessing:
            process_stats_event_side_effect
        )

-        with patch(
-            'openhands.app_server.event_callback.webhook_router._run_callbacks_in_bg_and_close'
-        ) as mock_callbacks:
-            # Call on_event directly with dependencies
+        with (
+            patch(
+                'openhands.app_server.event_callback.webhook_router.valid_sandbox',
+                return_value=mock_sandbox,
+            ),
+            patch(
+                'openhands.app_server.event_callback.webhook_router.valid_conversation',
+                return_value=mock_app_conversation_info,
+            ),
+            patch(
+                'openhands.app_server.event_callback.webhook_router._run_callbacks_in_bg_and_close'
+            ) as mock_callbacks,
+        ):
            await on_event(
                events=events,
                conversation_id=conversation_id,
-                app_conversation_info=mock_app_conversation_info,
+                sandbox_info=mock_sandbox,
                app_conversation_info_service=mock_app_conversation_info_service,
                event_service=mock_event_service,
            )

-        # Verify events were saved
-        assert mock_event_service.save_event.call_count == 2
+            # Verify events were saved
+            assert mock_event_service.save_event.call_count == 2

-        # Verify stats event was processed
-        mock_app_conversation_info_service.update_conversation_statistics.assert_called_once()
+            # Verify stats event was processed
+            mock_app_conversation_info_service.update_conversation_statistics.assert_called_once()

-        # Verify callbacks were scheduled
-        mock_callbacks.assert_called_once()
+            # Verify callbacks were scheduled
+            mock_callbacks.assert_called_once()

    @pytest.mark.asyncio
    async def test_on_event_skips_non_stats_events(self):
        """Test that on_event skips non-stats events."""
-        from unittest.mock import patch
-
        from openhands.app_server.event_callback.webhook_router import on_event
+        from openhands.app_server.sandbox.sandbox_models import (
+            SandboxInfo,
+            SandboxStatus,
+        )
        from openhands.events.action.message import MessageAction

        conversation_id = uuid4()
@@ -543,6 +568,14 @@ class TestOnEventStatsProcessing:
            MessageAction(content='test'),
        ]

+        mock_sandbox = SandboxInfo(
+            id=sandbox_id,
+            status=SandboxStatus.RUNNING,
+            session_api_key='test_key',
+            created_by_user_id='user_123',
+            sandbox_spec_id='spec_123',
+        )
+
        mock_app_conversation_info = AppConversationInfo(
            id=conversation_id,
            sandbox_id=sandbox_id,
@@ -551,18 +584,30 @@ class TestOnEventStatsProcessing:

        mock_event_service = AsyncMock()
        mock_app_conversation_info_service = AsyncMock()
+        mock_app_conversation_info_service.get_app_conversation_info.return_value = (
+            mock_app_conversation_info
+        )

-        with patch(
-            'openhands.app_server.event_callback.webhook_router._run_callbacks_in_bg_and_close'
+        with (
+            patch(
+                'openhands.app_server.event_callback.webhook_router.valid_sandbox',
+                return_value=mock_sandbox,
+            ),
+            patch(
+                'openhands.app_server.event_callback.webhook_router.valid_conversation',
+                return_value=mock_app_conversation_info,
+            ),
+            patch(
+                'openhands.app_server.event_callback.webhook_router._run_callbacks_in_bg_and_close'
+            ),
        ):
-            # Call on_event directly with dependencies
            await on_event(
                events=events,
                conversation_id=conversation_id,
-                app_conversation_info=mock_app_conversation_info,
+                sandbox_info=mock_sandbox,
                app_conversation_info_service=mock_app_conversation_info_service,
                event_service=mock_event_service,
            )

-        # Verify stats update was NOT called
-        mock_app_conversation_info_service.update_conversation_statistics.assert_not_called()
+            # Verify stats update was NOT called
+            mock_app_conversation_info_service.update_conversation_statistics.assert_not_called()
@@ -3642,7 +3642,7 @@ wheels = [

 [[package]]
 name = "openhands-agent-server"
-version = "1.13.0"
+version = "1.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "aiosqlite" },
@@ -3656,9 +3656,9 @@ dependencies = [
    { name = "websockets" },
    { name = "wsproto" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c2/d0/419756ad3368e7ab47c07111dfb4bf40073c110817914e09553b8e056fe8/openhands_agent_server-1.13.0.tar.gz", hash = "sha256:6f8b296c0f26a478d4eb49668a353e2b6997c39022c2bbcc36325f5f08887a7a", size = 73594, upload-time = "2026-03-10T18:41:25.52Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5f/18/d76d977201ec93faf22d6cc979b5c9953a0b554bf3294cdb3186d48a5d5a/openhands_agent_server-1.12.0.tar.gz", hash = "sha256:7ea7ce579175f713ed68b68cde5d685ef694627ac7bbff40d2e22913f065c46d", size = 72715, upload-time = "2026-03-05T19:22:23.027Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/e1/77b9b3181e6cba89c601533757d148f911416ff968a4ea5fe0882d479ccf/openhands_agent_server-1.13.0-py3-none-any.whl", hash = "sha256:88bb8bfb03ff0cc7a7d32ffabd108d0a284f4333f33a9de27ce158b6d828bc29", size = 88607, upload-time = "2026-03-10T18:41:18.321Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/47/dc31d7ffd6f6687ce4cc0114e01cf1f7f13f9ba841cd47dac5a983e57fb9/openhands_agent_server-1.12.0-py3-none-any.whl", hash = "sha256:3bd62fef10092f1155af116a8a7417041d574eff9d4e4b6f7a24bfc432de2fad", size = 87800, upload-time = "2026-03-05T19:22:27.857Z" },
 ]

 [[package]]
@@ -3826,9 +3826,9 @@ requires-dist = [
    { name = "numpy" },
    { name = "openai", specifier = "==2.8" },
    { name = "openhands-aci", specifier = "==0.3.3" },
-    { name = "openhands-agent-server", specifier = "==1.13" },
-    { name = "openhands-sdk", specifier = "==1.13" },
-    { name = "openhands-tools", specifier = "==1.13" },
+    { name = "openhands-agent-server", specifier = "==1.12" },
+    { name = "openhands-sdk", specifier = "==1.12" },
+    { name = "openhands-tools", specifier = "==1.12" },
    { name = "opentelemetry-api", specifier = ">=1.33.1" },
    { name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.33.1" },
    { name = "pathspec", specifier = ">=0.12.1" },
@@ -3906,7 +3906,7 @@ test = [

 [[package]]
 name = "openhands-sdk"
-version = "1.13.0"
+version = "1.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "agent-client-protocol" },
@@ -3923,14 +3923,14 @@ dependencies = [
    { name = "tenacity" },
    { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/76/d0/5e35e99252f16c3e9b8eec843b7054ed7d3ad9fadcc0b40064ab3de55469/openhands_sdk-1.13.0.tar.gz", hash = "sha256:fbb2a2dc4852ea23cc697a36fb3f95ca47cfef432b0d195c496de6f374caad9c", size = 330526, upload-time = "2026-03-10T18:41:19.513Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/44/715dd4c43e1a4ba2c47ebd251240dd6aca0dd604cc1354932f0344f93b40/openhands_sdk-1.12.0.tar.gz", hash = "sha256:ac348e7134ea21e1ab453978962504aff8eb47e62df1fb7a503d769d55658ea9", size = 323133, upload-time = "2026-03-05T19:22:26.623Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/12/b1/31737964179a8e5a0ed1d0485082a703e2d4cd346701ab4a383ddf33eebb/openhands_sdk-1.13.0-py3-none-any.whl", hash = "sha256:ec83f9fa2934aae9c4ce1c0365a7037f7e17869affa44a40e71ba49d2bef7185", size = 420504, upload-time = "2026-03-10T18:41:24.224Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/2f/b7ba4f261d806aaab46f372d2049503ccedde373bb0648b88ebce58ebfe7/openhands_sdk-1.12.0-py3-none-any.whl", hash = "sha256:857793f5c27fd63c0d4d37762550e6c504a03dd06116475c23adcc14bb5c4c02", size = 411337, upload-time = "2026-03-05T19:22:29.369Z" },
 ]

 [[package]]
 name = "openhands-tools"
-version = "1.13.0"
+version = "1.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "bashlex" },
@@ -3943,9 +3943,9 @@ dependencies = [
    { name = "pydantic" },
    { name = "tom-swe" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8c/91/0af0f29dc0da57e7df13bd1653eff80d5c47b8311c6825568837d6ba2af7/openhands_tools-1.13.0.tar.gz", hash = "sha256:e1181701efab5bc3133566e3b1640027824147438959cd8ce7430c941896704d", size = 111922, upload-time = "2026-03-10T18:41:26.872Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2b/84/9552e75326c341707d36f7a86ba9a55a8fcb48bfd97e4d1ebe989260fdd8/openhands_tools-1.12.0.tar.gz", hash = "sha256:f2b4d81d0b6771f5416f8b702db09a14999fa8e553073bcf38f344e29aae770c", size = 110293, upload-time = "2026-03-05T19:22:23.906Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/e7/44d677fdd73f249c9bc8a76d2a32848ed96f54324b7d4b0589bb70f7d4e8/openhands_tools-1.13.0-py3-none-any.whl", hash = "sha256:87073b868e20f9c769497f480e0d15b14ca41314c3d1cb5076029f37408a1d68", size = 152193, upload-time = "2026-03-10T18:41:20.563Z" },
+    { url = "https://files.pythonhosted.org/packages/81/26/70031063c81bb1215f5a5d85c33c4e62e6a3d318dd8e3609e5ce68040faa/openhands_tools-1.12.0-py3-none-any.whl", hash = "sha256:57207e9e30f9d7fe9121cd21b072580cfdc2a00831edeaf8e8d685d721bb9e33", size = 150468, upload-time = "2026-03-05T19:22:24.974Z" },
 ]

 [[package]]
@@ -8528,19 +8528,21 @@ wheels = [

 [[package]]
 name = "tornado"
-version = "6.5.5"
+version = "6.5.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" },
-    { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" },
-    { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" },
-    { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a9/e94a9d5224107d7ce3cc1fab8d5dc97f5ea351ccc6322ee4fb661da94e35/tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9", size = 443909, upload-time = "2025-12-15T19:20:48.382Z" },
+    { url = "https://files.pythonhosted.org/packages/db/7e/f7b8d8c4453f305a51f80dbb49014257bb7d28ccb4bbb8dd328ea995ecad/tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843", size = 442163, upload-time = "2025-12-15T19:20:49.791Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/b5/206f82d51e1bfa940ba366a8d2f83904b15942c45a78dd978b599870ab44/tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17", size = 445746, upload-time = "2025-12-15T19:20:51.491Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/9d/1a3338e0bd30ada6ad4356c13a0a6c35fbc859063fa7eddb309183364ac1/tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335", size = 445083, upload-time = "2025-12-15T19:20:52.778Z" },
+    { url = "https://files.pythonhosted.org/packages/50/d4/e51d52047e7eb9a582da59f32125d17c0482d065afd5d3bc435ff2120dc5/tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f", size = 445315, upload-time = "2025-12-15T19:20:53.996Z" },
+    { url = "https://files.pythonhosted.org/packages/27/07/2273972f69ca63dbc139694a3fc4684edec3ea3f9efabf77ed32483b875c/tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84", size = 446003, upload-time = "2025-12-15T19:20:56.101Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/83/41c52e47502bf7260044413b6770d1a48dda2f0246f95ee1384a3cd9c44a/tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f", size = 445412, upload-time = "2025-12-15T19:20:57.398Z" },
+    { url = "https://files.pythonhosted.org/packages/10/c7/bc96917f06cbee182d44735d4ecde9c432e25b84f4c2086143013e7b9e52/tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8", size = 445392, upload-time = "2025-12-15T19:20:58.692Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" },
+    { url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" },
 ]

 [[package]]
Author	SHA1	Message	Date
openhands	05270dfe2a	Add GitHub resolver integration tests with mock server This adds integration tests for the GitHub resolver feature: - Mock GitHub Server (mocks/github-mock-server.ts): - Simulates GitHub REST API endpoints - Handles webhook signature verification - Records webhook events and outgoing responses - Provides test control endpoints for assertions - Webhook Payload Templates (mocks/github-webhook-payloads.ts): - Issue labeled events - Issue comment events - PR review comment events - Mock GitHub Client (mocks/mock-github-client.ts): - Client utilities for triggering webhooks - Helpers for waiting on resolver responses - GitHub Resolver Test Spec (tests/github-resolver.spec.ts): - Mock Server Mode: Tests full webhook flow with mock server - Live Environment Mode: Tests against staging/production - Error handling tests for invalid signatures and malformed data - Tests run against the existing authenticated session - Updated package.json with new scripts: - npm run test:github-resolver - npm run mock:github - Updated README with comprehensive documentation Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-12 14:46:06 +00:00
openhands	b9bd04e1cb	Add ESLint and Prettier lint checks for integration tests - Add .eslintrc with airbnb-base + TypeScript + Prettier config - Add .prettierrc.json matching frontend configuration - Add lint dependencies to package.json - Add typecheck, lint, and lint:fix scripts - Auto-format all TypeScript files with Prettier - Fix unused imports and parameters Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 21:31:18 +00:00
tofarr	afc499933b	Lint fix	2026-03-11 15:26:32 -06:00
tofarr	8f0e372133	Marked tests critical	2026-03-11 15:23:13 -06:00
tofarr	25540c6b4e	Fixed delete test	2026-03-11 15:19:52 -06:00
openhands	49627d44ca	Fix API key test: add wait after deletion and visibility check - Add 1 second wait after key deletion for page to settle - Add explicit visibility check before clicking Create API Key button Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 21:08:37 +00:00
tofarr	c18c9e2118	Moved API test to end	2026-03-11 15:04:13 -06:00
openhands	fb45bb2c92	Add API key creation and validation test Test flow: - Navigate to API Keys page via user menu - Verify Refresh API Key button is visible (indicates credits available) - Delete existing 'Integration Test Key' if present - Create new API key named 'Integration Test Key' - Capture the key from the modal - Test the key by calling GET /api/v1/sandboxes/search with X-Access-Token header - Verify response contains at least 1 sandbox Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 20:16:48 +00:00
tofarr	e65e9103d8	Better button detection	2026-03-11 13:57:35 -06:00
openhands	29f5bfdf0c	Fix Stripe checkout: wait for Pay button to be attached (not visible) Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 19:42:49 +00:00
openhands	8c90ddcea2	Fix Stripe checkout: wait for Pay button instead of networkidle The networkidle wait was timing out because the 'Pay with Link' feature loads slowly. Since we don't use that feature, we now wait for the Pay button to be visible instead, which indicates the form is ready to fill. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 19:39:04 +00:00
openhands	ba26ad5147	Add Stripe billing test for purchasing $10 credits Test flow: - Navigate to billing page via user menu - Capture initial balance - Enter $10 and click Add Credit - Fill Stripe checkout form (test card 5105105105105100) - Submit payment and return to billing page - Verify balance increased by exactly $10 Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 19:23:10 +00:00
openhands	33c6f63589	Update Tavily test to navigate to existing conversation Instead of starting a new conversation, the Tavily search test now navigates to the first running conversation from the recent conversations list. This is faster and tests the navigation flow. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 18:59:33 +00:00
Tim O'Farrell	63365e2793	Merge branch 'main' into add-playwright-smoke-tests	2026-03-11 12:56:00 -06:00
openhands	b45cf6d23f	Refactor smoke tests to use waitForMessageContaining - Remove TEST_PROMPT environment variable (use specific prompts per test) - Add waitForMessageContaining() method to ConversationPage that polls for a message containing expected text instead of just getting last message - Update 'start conversation' test to use 'Reverse the word hello' prompt and wait for 'olleh' in response - Update Tavily test to wait for 'Micheál Martin' in response - This fixes flaky tests where LLM outputs additional messages after the expected response Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 18:50:31 +00:00
Tim O'Farrell	80dc8b0fbf	Merge branch 'main' into add-playwright-smoke-tests	2026-03-11 12:28:23 -06:00
openhands	ab3b5dc947	Add Tavily search test for Irish Prime Minister Add test that: - Starts a new conversation - Prompts agent to use Tavily search to find the PM of Ireland - Verifies response contains 'Micheál Martin' - Uses 180s timeout to allow for search operation Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 18:03:47 +00:00
openhands	43bc9a8a1a	Replace error banner test with navigate to conversation test - Remove 'should not display error banner on successful interaction' test - Add 'should be able to navigate to a running conversation' test that: - Goes to home page - Clicks first conversation in recent conversations list - Waits for conversation status to show 'Waiting for task' - Add clickFirstConversation() method to HomePage Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 17:37:39 +00:00
openhands	7902806a92	Add verification that coin flip response contains heads or tails Verify the agent's response to 'Flip a coin!' contains either 'heads' or 'tails' (case insensitive) to ensure the agent actually processed the request correctly. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 17:18:25 +00:00
openhands	610856cdd4	Merge conversation start and prompt tests into single test Combined 'should be able to start a conversation and interact with agent' and 'should be able to send a prompt and receive response without errors' into one test since sending a prompt depends on having started a conversation. This avoids duplicate setup (navigating to home, starting conversation, waiting for ready) and tests the complete user flow in one go. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 15:09:17 +00:00
openhands	500005ec4d	Fix openUserMenu to wait for async state before interacting The account settings menu is conditionally rendered based on async state (config loaded, user authenticated, providers loaded). The previous implementation would fail because: 1. The menu DOM element doesn't exist until async conditions are met 2. Even if avatar is visible, the menu might not be rendered yet Changes: - Wait for user avatar to be visible first - Wait for menu to be 'attached' to DOM (ensures async state loaded) - Hover over user-actions container (parent with group class) to trigger the CSS group-hover visibility - Then verify menu is visible Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 13:11:16 +00:00
tofarr	ac0674287c	Enable user menu by click rather than hover	2026-03-11 06:50:56 -06:00
openhands	df6c5f4de3	Fix TypeScript errors in smoke tests - Add default value for buttonId parameter in startNewConversation() - Fix TEST_REPO_URL undefined handling with proper conditional checks - Restore openUserMenu to use hover instead of click Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 12:46:28 +00:00
openhands	6f1df78b78	Update waitForConversationReady to check for 'Waiting for task' text - Change default timeout from 90s to 30s - Replace input enabled check with text-based search for 'Waiting for task' - Using text search since data-testid is not yet deployed to staging Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 12:44:49 +00:00
tofarr	f8b456962c	WIP	2026-03-11 06:39:37 -06:00
openhands	213fc9d752	Add agent status wait for 'Waiting for task' in smoke tests - Add data-testid='agent-status-text' to agent-status.tsx span element - Add wait for agent status to show 'Waiting for task' before proceeding in smoke.spec.ts conversation test Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 02:02:09 +00:00
Tim O'Farrell	8b78397ec4	Merge branch 'main' into add-playwright-smoke-tests	2026-03-10 19:53:54 -06:00
tofarr	adc6ec77ae	Revert back to click rather than hover	2026-03-10 19:53:10 -06:00
tofarr	c1cd21e94d	Merge branch 'add-playwright-smoke-tests' of https://github.com/OpenHands/OpenHands into add-playwright-smoke-tests	2026-03-10 19:51:41 -06:00
tofarr	36bf86db36	General updates - starting a conversation	2026-03-10 19:51:03 -06:00
openhands	72ab6aed88	Fix user menu to use hover instead of click in Playwright tests The user settings menu appears on hover in non-mobile mode due to CSS group-hover classes, not on click. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-10 19:42:56 +00:00
openhands	7426b913a1	Handle Keycloak session redirect and TOS acceptance in Playwright tests - Update authenticateWithGitHub to detect redirects to home page or /accept-tos when user is already logged in to Keycloak - Add handleTOSAcceptance function to check TOS checkbox and submit - Add post-authentication TOS handling for users completing GitHub OAuth Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-10 18:36:10 +00:00
tofarr	4b32786588	Updated test	2026-03-10 09:26:12 -06:00
tofarr	c4929925fb	Added prompt	2026-03-09 20:50:22 -06:00
openhands	158799b45e	fix: make storageState conditional on auth file existence The Playwright config was failing with ENOENT when auth.json didn't exist because storageState was unconditionally set to the auth file path. Now the config checks if fixtures/auth.json exists before setting storageState, allowing the setup project to run first and create the auth file.	2026-03-10 02:49:04 +00:00
Tim O'Farrell	4f0f0d0b98	Merge branch 'main' into add-playwright-smoke-tests	2026-03-09 17:44:20 -06:00
openhands	07c9c3016c	chore: Disable automatic workflow triggers, manual only for now Keep only workflow_dispatch trigger active. Automatic triggers (push, PR, schedule) and dependent jobs are commented out and can be re-enabled later. Co-authored-by: openhands <openhands@all-hands.dev>	2026-02-10 09:50:17 +00:00
openhands	0f912e09cc	feat: Add Playwright-based smoke tests for integration testing Add a comprehensive integration test framework using Playwright to enable automated smoke testing across different environments (staging, production, feature branches). Features: - Multi-environment support with configurable BASE_URL - GitHub OAuth and Keycloak authentication handling - Page Object Model architecture for maintainable tests - Authentication state persistence for faster test runs - GitHub Actions workflow for CI/CD integration Tests cover: - User authentication flow - Home screen accessibility - Repository selection - Conversation creation and agent interaction - Error-free prompt/response cycle - Health checks (SSL, console errors) Co-authored-by: openhands <openhands@all-hands.dev>	2026-02-10 09:47:52 +00:00