Add GitHub resolver integration tests with mock server

This adds integration tests for the GitHub resolver feature: - Mock GitHub Server (mocks/github-mock-server.ts): - Simulates GitHub REST API endpoints - Handles webhook signature verification - Records webhook events and outgoing responses - Provides test control endpoints for assertions - Webhook Payload Templates (mocks/github-webhook-payloads.ts): - Issue labeled events - Issue comment events - PR review comment events - Mock GitHub Client (mocks/mock-github-client.ts): - Client utilities for triggering webhooks - Helpers for waiting on resolver responses - GitHub Resolver Test Spec (tests/github-resolver.spec.ts): - Mock Server Mode: Tests full webhook flow with mock server - Live Environment Mode: Tests against staging/production - Error handling tests for invalid signatures and malformed data - Tests run against the existing authenticated session - Updated package.json with new scripts: - npm run test:github-resolver - npm run mock:github - Updated README with comprehensive documentation Co-authored-by: openhands <openhands@all-hands.dev>
Add ESLint and Prettier lint checks for integration tests
2026-04-29 03:00:45 -04:00 · 2026-03-12 14:46:06 +00:00 · 2026-03-11 21:31:18 +00:00 · 2026-03-11 15:26:32 -06:00 · 2026-03-11 15:23:13 -06:00 · 2026-03-11 15:19:52 -06:00
86 changed files with 11038 additions and 1101 deletions
@@ -55,7 +55,7 @@ jobs:
      - name: Build Environment
        run: make build
      - name: Run Unit Tests
-        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -n auto -s ./tests/unit --cov=openhands --cov-branch
+        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -s ./tests/unit --cov=openhands --cov-branch
        env:
          COVERAGE_FILE: ".coverage.${{ matrix.python_version }}"
      - name: Run Runtime Tests with CLIRuntime
@@ -91,7 +91,7 @@ jobs:
        run: poetry install --with dev,test
      - name: Run Unit Tests
        # Use base working directory for coverage paths to line up.
-        run: PYTHONPATH=".:$PYTHONPATH" poetry run --project=enterprise pytest -n auto -s -p no:ddtrace -p no:ddtrace.pytest_bdd -p no:ddtrace.pytest_benchmark ./enterprise/tests/unit --cov=enterprise --cov-branch
+        run: PYTHONPATH=".:$PYTHONPATH" poetry run --project=enterprise pytest --forked -n auto -s -p no:ddtrace -p no:ddtrace.pytest_bdd -p no:ddtrace.pytest_benchmark ./enterprise/tests/unit --cov=enterprise --cov-branch
        env:
          COVERAGE_FILE: ".coverage.enterprise.${{ matrix.python_version }}"
      - name: Store coverage file
@@ -0,0 +1,193 @@
+name: Smoke Tests
+
+on:
+  # Manual trigger only - uncomment triggers below to enable automatic runs
+  # push:
+  #   branches: [main]
+  # pull_request:
+  #   branches: [main]
+  # schedule:
+  #   - cron: '0 */6 * * *'
+
+  # Manual trigger with environment selection
+  workflow_dispatch:
+    inputs:
+      environment:
+        description: 'Target environment'
+        required: true
+        default: 'staging'
+        type: choice
+        options:
+          - staging
+          - production
+      base_url:
+        description: 'Custom base URL (overrides environment selection)'
+        required: false
+        type: string
+
+# Cancel previous runs on the same branch
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  NODE_VERSION: '22'
+
+jobs:
+  smoke-tests:
+    name: Smoke Tests (${{ github.event.inputs.environment || 'staging' }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+          cache-dependency-path: integration_tests/package-lock.json
+
+      - name: Install dependencies
+        working-directory: ./integration_tests
+        run: npm ci
+
+      - name: Install Playwright browsers
+        working-directory: ./integration_tests
+        run: npx playwright install --with-deps chromium
+
+      - name: Determine base URL
+        id: base-url
+        run: |
+          if [ -n "${{ github.event.inputs.base_url }}" ]; then
+            echo "url=${{ github.event.inputs.base_url }}" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event.inputs.environment }}" = "production" ]; then
+            echo "url=https://app.all-hands.dev" >> $GITHUB_OUTPUT
+          else
+            echo "url=https://staging.all-hands.dev" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Run smoke tests
+        working-directory: ./integration_tests
+        env:
+          BASE_URL: ${{ steps.base-url.outputs.url }}
+          AUTH_METHOD: github
+          GITHUB_TEST_USERNAME: ${{ secrets.SMOKE_TEST_GITHUB_USERNAME }}
+          GITHUB_TEST_PASSWORD: ${{ secrets.SMOKE_TEST_GITHUB_PASSWORD }}
+          GITHUB_TEST_TOTP_SECRET: ${{ secrets.SMOKE_TEST_GITHUB_TOTP_SECRET }}
+          TEST_REPO_URL: ${{ secrets.SMOKE_TEST_REPO_URL }}
+          CI: true
+        run: npm test
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report-${{ github.run_id }}
+          path: integration_tests/playwright-report/
+          retention-days: 30
+
+      - name: Upload test artifacts
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: test-results-${{ github.run_id }}
+          path: |
+            integration_tests/test-results/
+            integration_tests/playwright-report/
+          retention-days: 14
+
+  # Notify on failure (optional - configure Slack/Discord webhook)
+  # Disabled until automatic triggers are enabled
+  notify-failure:
+    name: Notify on Failure
+    runs-on: ubuntu-latest
+    needs: smoke-tests
+    if: false # Disabled - set to: failure() && github.ref == 'refs/heads/main'
+
+    steps:
+      - name: Send notification
+        run: |
+          echo "Smoke tests failed on main branch!"
+          echo "View results: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          # Add Slack/Discord notification here if needed
+          # Example with curl to Slack webhook:
+          # curl -X POST -H 'Content-type: application/json' \
+          #   --data '{"text":"🚨 Smoke tests failed on main! <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Results>"}' \
+          #   ${{ secrets.SLACK_WEBHOOK_URL }}
+
+  # Feature branch smoke tests (disabled - was triggered by PR label)
+  # To enable: uncomment pull_request trigger above and this job
+  feature-branch-test:
+    name: Feature Branch Smoke Test
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    if: false # Disabled - set to: github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'smoke-test')
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'npm'
+          cache-dependency-path: integration_tests/package-lock.json
+
+      - name: Install dependencies
+        working-directory: ./integration_tests
+        run: npm ci
+
+      - name: Install Playwright browsers
+        working-directory: ./integration_tests
+        run: npx playwright install --with-deps chromium
+
+      - name: Extract branch name for URL
+        id: branch
+        run: |
+          BRANCH_NAME="${{ github.head_ref }}"
+          # Sanitize branch name for URL (replace special chars)
+          SANITIZED=$(echo "$BRANCH_NAME" | sed 's/[^a-zA-Z0-9-]/-/g' | tr '[:upper:]' '[:lower:]')
+          echo "name=$SANITIZED" >> $GITHUB_OUTPUT
+
+      - name: Run smoke tests against feature branch
+        working-directory: ./integration_tests
+        env:
+          BASE_URL: https://${{ steps.branch.outputs.name }}.staging.all-hands.dev
+          AUTH_METHOD: github
+          GITHUB_TEST_USERNAME: ${{ secrets.SMOKE_TEST_GITHUB_USERNAME }}
+          GITHUB_TEST_PASSWORD: ${{ secrets.SMOKE_TEST_GITHUB_PASSWORD }}
+          CI: true
+        run: npm test
+        continue-on-error: true
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: feature-branch-report-${{ github.run_id }}
+          path: integration_tests/playwright-report/
+          retention-days: 7
+
+      - name: Comment on PR with results
+        uses: actions/github-script@v7
+        if: always()
+        with:
+          script: |
+            const fs = require('fs');
+            const outcome = '${{ job.status }}';
+            const branchUrl = 'https://${{ steps.branch.outputs.name }}.staging.all-hands.dev';
+
+            const body = outcome === 'success'
+              ? `✅ **Smoke tests passed** against [${branchUrl}](${branchUrl})`
+              : `❌ **Smoke tests failed** against [${branchUrl}](${branchUrl})\n\nView the [test report](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}) for details.`;
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: body
+            });
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.

 [[package]]
 name = "agent-client-protocol"
@@ -3501,7 +3501,7 @@ files = [
 [package.dependencies]
 googleapis-common-protos = ">=1.5.5"
 grpcio = ">=1.71.2"
-protobuf = ">=5.26.1,<6.0.dev0"
+protobuf = ">=5.26.1,<6.0dev"

 [[package]]
 name = "gspread"
@@ -3819,7 +3819,7 @@ pfzy = ">=0.3.1,<0.4.0"
 prompt-toolkit = ">=3.0.1,<4.0.0"

 [package.extras]
-docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]
+docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]

 [[package]]
 name = "installer"
@@ -4258,7 +4258,7 @@ fqdn = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 idna = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 isoduration = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 jsonpointer = {version = ">1.13", optional = true, markers = "extra == \"format-nongpl\""}
-jsonschema-specifications = ">=2023.3.6"
+jsonschema-specifications = ">=2023.03.6"
 referencing = ">=0.28.4"
 rfc3339-validator = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 rfc3986-validator = {version = ">0.1.0", optional = true, markers = "extra == \"format-nongpl\""}
@@ -4648,7 +4648,7 @@ files = [
 ]

 [package.dependencies]
-certifi = ">=14.5.14"
+certifi = ">=14.05.14"
 durationpy = ">=0.7"
 google-auth = ">=1.0.1"
 oauthlib = ">=3.2.2"
@@ -6889,7 +6889,7 @@ files = [
 ]

 [package.extras]
-docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]
+docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]

 [[package]]
 name = "pg8000"
@@ -7551,6 +7551,18 @@ files = [
    {file = "puremagic-1.30.tar.gz", hash = "sha256:f9ff7ac157d54e9cf3bff1addfd97233548e75e685282d84ae11e7ffee1614c9"},
 ]

+[[package]]
+name = "py"
+version = "1.11.0"
+description = "library with cross-python path, ini-parsing, io, code, log facilities"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["test"]
+files = [
+    {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
+    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
+]
+
 [[package]]
 name = "py-key-value-aio"
 version = "0.4.4"
@@ -11587,14 +11599,14 @@ diagrams = ["jinja2", "railroad-diagrams"]

 [[package]]
 name = "pypdf"
-version = "6.7.5"
+version = "6.8.0"
 description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13"},
-    {file = "pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d"},
+    {file = "pypdf-6.8.0-py3-none-any.whl", hash = "sha256:2a025080a8dd73f48123c89c57174a5ff3806c71763ee4e49572dc90454943c7"},
+    {file = "pypdf-6.8.0.tar.gz", hash = "sha256:cb7eaeaa4133ce76f762184069a854e03f4d9a08568f0e0623f7ea810407833b"},
 ]

 [package.extras]
@@ -11691,6 +11703,22 @@ pytest = ">=7"
 [package.extras]
 testing = ["process-tests", "pytest-xdist", "virtualenv"]

+[[package]]
+name = "pytest-forked"
+version = "1.6.0"
+description = "run tests in isolated forked subprocesses"
+optional = false
+python-versions = ">=3.7"
+groups = ["test"]
+files = [
+    {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"},
+    {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"},
+]
+
+[package.dependencies]
+py = "*"
+pytest = ">=3.10"
+
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"
@@ -12838,10 +12866,10 @@ files = [
 ]

 [package.dependencies]
-botocore = ">=1.37.4,<2.0a0"
+botocore = ">=1.37.4,<2.0a.0"

 [package.extras]
-crt = ["botocore[crt] (>=1.37.4,<2.0a0)"]
+crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]

 [[package]]
 name = "scantree"
@@ -14978,9 +15006,9 @@ files = [
 ]

 [package.extras]
-cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b0) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]
+cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]

 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12,<3.14"
-content-hash = "4221146bf5d0dda799dde9ecdec5d38db556db8a759549efe7d67372b5750b67"
+content-hash = "ef037f6d6085d26166d35c56ce266439f8f1a4fea90bc43ccf15cfeaf116cae5"
@@ -61,6 +61,7 @@ types-requests = "^2.32.4.20250611"
 pytest = "*"
 pytest-cov = "*"
 pytest-asyncio = "*"
+pytest-forked = "*"
 pytest-xdist = "*"
 flake8 = "*"
 openai = "*"
@@ -12,11 +12,8 @@ from server.auth.auth_error import (
 )
 from server.auth.gitlab_sync import schedule_gitlab_repo_sync
 from server.auth.saas_user_auth import SaasUserAuth, token_manager
-from server.routes.auth import (
-    get_cookie_domain,
-    get_cookie_samesite,
-    set_response_cookie,
-)
+from server.routes.auth import set_response_cookie
+from server.utils.url_utils import get_cookie_domain, get_cookie_samesite

 from openhands.core.logger import openhands_logger as logger
 from openhands.server.user_auth.user_auth import AuthType, UserAuth, get_user_auth
@@ -93,8 +90,8 @@ class SetAuthCookieMiddleware:
            if keycloak_auth_cookie:
                response.delete_cookie(
                    key='keycloak_auth',
-                    domain=get_cookie_domain(request),
-                    samesite=get_cookie_samesite(request),
+                    domain=get_cookie_domain(),
+                    samesite=get_cookie_samesite(),
                )
            return response

@@ -3,7 +3,7 @@ import json
 import uuid
 import warnings
 from datetime import datetime, timezone
-from typing import Annotated, Literal, Optional, cast
+from typing import Annotated, Optional, cast
 from urllib.parse import quote, urlencode
 from uuid import UUID as parse_uuid

@@ -27,7 +27,7 @@ from server.auth.user.user_authorizer import (
    depends_user_authorizer,
 )
 from server.config import sign_token
-from server.constants import IS_FEATURE_ENV
+from server.constants import IS_FEATURE_ENV, IS_LOCAL_ENV
 from server.routes.event_webhook import _get_session_api_key, _get_user_id
 from server.services.org_invitation_service import (
    EmailMismatchError,
@@ -37,12 +37,12 @@ from server.services.org_invitation_service import (
    UserAlreadyMemberError,
 )
 from server.utils.rate_limit_utils import check_rate_limit_by_user_id
+from server.utils.url_utils import get_cookie_domain, get_cookie_samesite, get_web_url
 from sqlalchemy import select
 from storage.database import a_session_maker
 from storage.user import User
 from storage.user_store import UserStore

-from openhands.app_server.config import get_global_config
 from openhands.core.logger import openhands_logger as logger
 from openhands.integrations.provider import ProviderHandler
 from openhands.integrations.service_types import ProviderType, TokenResponse
@@ -77,7 +77,7 @@ def set_response_cookie(
    signed_token = sign_token(cookie_data, config.jwt_secret.get_secret_value())  # type: ignore

    # Set secure cookie with signed token
-    domain = get_cookie_domain(request)
+    domain = get_cookie_domain()
    if domain:
        response.set_cookie(
            key='keycloak_auth',
@@ -85,7 +85,7 @@ def set_response_cookie(
            domain=domain,
            httponly=True,
            secure=secure,
-            samesite=get_cookie_samesite(request),
+            samesite=get_cookie_samesite(),
        )
    else:
        response.set_cookie(
@@ -93,30 +93,10 @@ def set_response_cookie(
            value=signed_token,
            httponly=True,
            secure=secure,
-            samesite=get_cookie_samesite(request),
+            samesite=get_cookie_samesite(),
        )


-def get_cookie_domain(request: Request) -> str | None:
-    # for now just use the full hostname except for staging stacks.
-    return (
-        None
-        if not request.url.hostname
-        or request.url.hostname.endswith('staging.all-hands.dev')
-        else request.url.hostname
-    )
-
-
-def get_cookie_samesite(request: Request) -> Literal['lax', 'strict']:
-    # for localhost and feature/staging stacks we set it to 'lax' as the cookie domain won't allow 'strict'
-    return (
-        'lax'
-        if request.url.hostname == 'localhost'
-        or (request.url.hostname or '').endswith('staging.all-hands.dev')
-        else 'strict'
-    )
-
-
 def _extract_oauth_state(state: str | None) -> tuple[str, str | None, str | None]:
    """Extract redirect URL, reCAPTCHA token, and invitation token from OAuth state.

@@ -140,19 +120,6 @@ def _extract_oauth_state(state: str | None) -> tuple[str, str | None, str | None
        return state, None, None


-# Keep alias for backward compatibility
-def _extract_recaptcha_state(state: str | None) -> tuple[str, str | None]:
-    """Extract redirect URL and reCAPTCHA token from OAuth state.
-
-    Deprecated: Use _extract_oauth_state instead.
-
-    Returns:
-        Tuple of (redirect_url, recaptcha_token). Token may be None.
-    """
-    redirect_url, recaptcha_token, _ = _extract_oauth_state(state)
-    return redirect_url, recaptcha_token
-
-
@oauth_router.get('/keycloak/callback')
 async def keycloak_callback(
    request: Request,
@@ -183,10 +150,7 @@ async def keycloak_callback(
            detail='Missing code in request params',
        )

-    web_url = get_global_config().web_url
-    if not web_url:
-        scheme = 'http' if request.url.hostname == 'localhost' else 'https'
-        web_url = f'{scheme}://{request.url.netloc}'
+    web_url = get_web_url(request)
    redirect_uri = web_url + request.url.path

    (
@@ -313,7 +277,9 @@ async def keycloak_callback(
            else:
                raise

-        verification_redirect_url = f'{request.base_url}login?email_verification_required=true&user_id={user_id}'
+        verification_redirect_url = (
+            f'{web_url}/login?email_verification_required=true&user_id={user_id}'
+        )
        if rate_limited:
            verification_redirect_url = f'{verification_redirect_url}&rate_limited=true'

@@ -474,9 +440,7 @@ async def keycloak_callback(
    # If the user hasn't accepted the TOS, redirect to the TOS page
    if not has_accepted_tos:
        encoded_redirect_url = quote(redirect_url, safe='')
-        tos_redirect_url = (
-            f'{request.base_url}accept-tos?redirect_url={encoded_redirect_url}'
-        )
+        tos_redirect_url = f'{web_url}/accept-tos?redirect_url={encoded_redirect_url}'
        if invitation_token:
            tos_redirect_url = f'{tos_redirect_url}&invitation_success=true'
        response = RedirectResponse(tos_redirect_url, status_code=302)
@@ -508,10 +472,9 @@ async def keycloak_offline_callback(code: str, state: str, request: Request):
            status_code=status.HTTP_400_BAD_REQUEST,
            content={'error': 'Missing code in request params'},
        )
-    scheme = 'https'
-    if request.url.hostname == 'localhost':
-        scheme = 'http'
-    redirect_uri = f'{scheme}://{request.url.netloc}{request.url.path}'
+
+    web_url = get_web_url(request)
+    redirect_uri = web_url + request.url.path
    logger.debug(f'code: {code}, redirect_uri: {redirect_uri}')

    (
@@ -533,15 +496,14 @@ async def keycloak_offline_callback(code: str, state: str, request: Request):
    )

    redirect_url, _, _ = _extract_oauth_state(state)
-    return RedirectResponse(
-        redirect_url if redirect_url else request.base_url, status_code=302
-    )
+    return RedirectResponse(redirect_url if redirect_url else web_url, status_code=302)


@oauth_router.get('/github/callback')
 async def github_dummy_callback(request: Request):
    """Callback for GitHub that just forwards the user to the app base URL."""
-    return RedirectResponse(request.base_url, status_code=302)
+    web_url = get_web_url(request)
+    return RedirectResponse(web_url, status_code=302)


@api_router.post('/authenticate')
@@ -563,8 +525,8 @@ async def authenticate(request: Request):
        if keycloak_auth_cookie:
            response.delete_cookie(
                key='keycloak_auth',
-                domain=get_cookie_domain(request),
-                samesite=get_cookie_samesite(request),
+                domain=get_cookie_domain(),
+                samesite=get_cookie_samesite(),
            )

        return response
@@ -588,7 +550,8 @@ async def accept_tos(request: Request):

    # Get redirect URL from request body
    body = await request.json()
-    redirect_url = body.get('redirect_url', str(request.base_url))
+    web_url = get_web_url(request)
+    redirect_url = body.get('redirect_url', str(web_url))

    # Update user settings with TOS acceptance
    accepted_tos: datetime = datetime.now(timezone.utc).replace(tzinfo=None)
@@ -618,7 +581,7 @@ async def accept_tos(request: Request):
        response=response,
        keycloak_access_token=access_token.get_secret_value(),
        keycloak_refresh_token=refresh_token.get_secret_value(),
-        secure=False if request.url.hostname == 'localhost' else True,
+        secure=not IS_LOCAL_ENV,
        accepted_tos=True,
    )
    return response
@@ -635,8 +598,8 @@ async def logout(request: Request):
    # Always delete the cookie regardless of what happens
    response.delete_cookie(
        key='keycloak_auth',
-        domain=get_cookie_domain(request),
-        samesite=get_cookie_samesite(request),
+        domain=get_cookie_domain(),
+        samesite=get_cookie_samesite(),
    )

    # Try to properly logout from Keycloak, but don't fail if it doesn't work
@@ -11,8 +11,8 @@ from integrations import stripe_service
 from pydantic import BaseModel
 from server.constants import STRIPE_API_KEY
 from server.logger import logger
+from server.utils.url_utils import get_web_url
 from sqlalchemy import select
-from starlette.datastructures import URL
 from storage.billing_session import BillingSession
 from storage.database import a_session_maker
 from storage.lite_llm_manager import LiteLlmManager
@@ -151,7 +151,7 @@ async def create_customer_setup_session(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail='Could not find or create customer for user',
        )
-    base_url = _get_base_url(request)
+    base_url = get_web_url(request)
    checkout_session = await stripe.checkout.Session.create_async(
        customer=customer_info['customer_id'],
        mode='setup',
@@ -170,7 +170,7 @@ async def create_checkout_session(
    user_id: str = Depends(get_user_id),
 ) -> CreateBillingSessionResponse:
    await validate_billing_enabled()
-    base_url = _get_base_url(request)
+    base_url = get_web_url(request)
    customer_info = await stripe_service.find_or_create_customer_by_user_id(user_id)
    if not customer_info:
        raise HTTPException(
@@ -198,8 +198,8 @@ async def create_checkout_session(
        saved_payment_method_options={
            'payment_method_save': 'enabled',
        },
-        success_url=f'{base_url}api/billing/success?session_id={{CHECKOUT_SESSION_ID}}',
-        cancel_url=f'{base_url}api/billing/cancel?session_id={{CHECKOUT_SESSION_ID}}',
+        success_url=f'{base_url}/api/billing/success?session_id={{CHECKOUT_SESSION_ID}}',
+        cancel_url=f'{base_url}/api/billing/cancel?session_id={{CHECKOUT_SESSION_ID}}',
    )
    logger.info(
        'created_stripe_checkout_session',
@@ -300,7 +300,7 @@ async def success_callback(session_id: str, request: Request):
        await session.commit()

    return RedirectResponse(
-        f'{_get_base_url(request)}settings/billing?checkout=success', status_code=302
+        f'{get_web_url(request)}/settings/billing?checkout=success', status_code=302
    )


@@ -325,17 +325,9 @@ async def cancel_callback(session_id: str, request: Request):
            )
            billing_session.status = 'cancelled'
            billing_session.updated_at = datetime.now(UTC)
-            session.merge(billing_session)
+            await session.merge(billing_session)
            await session.commit()

    return RedirectResponse(
-        f'{_get_base_url(request)}settings/billing?checkout=cancel', status_code=302
+        f'{get_web_url(request)}/settings/billing?checkout=cancel', status_code=302
    )
-
-
-def _get_base_url(request: Request) -> URL:
-    # Never send any part of the credit card process over a non secure connection
-    base_url = request.base_url
-    if base_url.hostname != 'localhost':
-        base_url = base_url.replace(scheme='https')
-    return base_url
@@ -7,8 +7,10 @@ from pydantic import BaseModel, field_validator
 from server.auth.constants import KEYCLOAK_CLIENT_ID
 from server.auth.keycloak_manager import get_keycloak_admin
 from server.auth.saas_user_auth import SaasUserAuth
+from server.constants import IS_LOCAL_ENV
 from server.routes.auth import set_response_cookie
 from server.utils.rate_limit_utils import check_rate_limit_by_user_id
+from server.utils.url_utils import get_web_url
 from storage.user_store import UserStore

 from openhands.core.logger import openhands_logger as logger
@@ -87,7 +89,7 @@ async def update_email(
            response=response,
            keycloak_access_token=user_auth.access_token.get_secret_value(),
            keycloak_refresh_token=user_auth.refresh_token.get_secret_value(),
-            secure=False if request.url.hostname == 'localhost' else True,
+            secure=not IS_LOCAL_ENV,
            accepted_tos=user_auth.accepted_tos or False,
        )

@@ -156,8 +158,8 @@ async def verified_email(request: Request):
    await user_auth.refresh()  # refresh so access token has updated email
    user_auth.email_verified = True
    await UserStore.update_user_email(user_id=user_auth.user_id, email_verified=True)
-    scheme = 'http' if request.url.hostname == 'localhost' else 'https'
-    redirect_uri = f'{scheme}://{request.url.netloc}/settings/user'
+
+    redirect_uri = f'{get_web_url(request)}/settings/user'
    response = RedirectResponse(redirect_uri, status_code=302)

    # need to set auth cookie to the new tokens
@@ -180,11 +182,10 @@ async def verified_email(request: Request):

 async def verify_email(request: Request, user_id: str, is_auth_flow: bool = False):
    keycloak_admin = get_keycloak_admin()
-    scheme = 'http' if request.url.hostname == 'localhost' else 'https'
    if is_auth_flow:
-        redirect_uri = f'{scheme}://{request.url.netloc}/login?email_verified=true'
+        redirect_uri = f'{get_web_url(request)}/login?email_verified=true'
    else:
-        redirect_uri = f'{scheme}://{request.url.netloc}/api/email/verified'
+        redirect_uri = f'{get_web_url(request)}/api/email/verified'
    logger.info(f'Redirect URI: {redirect_uri}')
    await keycloak_admin.a_send_verify_email(
        user_id=user_id,
@@ -6,6 +6,7 @@ from typing import Optional
 from fastapi import APIRouter, Depends, Form, HTTPException, Request, status
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+from server.utils.url_utils import get_web_url
 from storage.api_key_store import ApiKeyStore
 from storage.device_code_store import DeviceCodeStore

@@ -93,7 +94,7 @@ async def device_authorization(
            expires_in=DEVICE_CODE_EXPIRES_IN,
        )

-        base_url = str(http_request.base_url).rstrip('/')
+        base_url = get_web_url(http_request)
        verification_uri = f'{base_url}/oauth/device/verify'
        verification_uri_complete = (
            f'{verification_uri}?user_code={device_code_entry.user_code}'
@@ -365,14 +365,12 @@ class OrgInvitationService:
                'Failed to set up organization access. Please try again.'
            )

-        # Step 5: Add user to organization
-        from storage.org_member_store import OrgMemberStore as OMS
-
-        org_member_kwargs = OMS.get_kwargs_from_settings(settings)
-        # Don't override with org defaults - use invitation-specified role
-        org_member_kwargs.pop('llm_model', None)
-        org_member_kwargs.pop('llm_base_url', None)
+        # Step 4.5: Fetch organization to get its LLM settings
+        org = await OrgStore.get_org_by_id(invitation.org_id)
+        if not org:
+            raise InvitationInvalidError('Organization not found')

+        # Step 5: Add user to organization with inherited org LLM settings
        # Get the llm_api_key as string (it's SecretStr | None in Settings)
        llm_api_key = (
            settings.llm_api_key.get_secret_value() if settings.llm_api_key else ''
@@ -384,6 +382,9 @@ class OrgInvitationService:
            role_id=invitation.role_id,
            llm_api_key=llm_api_key,
            status='active',
+            llm_model=org.default_llm_model,
+            llm_base_url=org.default_llm_base_url,
+            max_iterations=org.default_max_iterations,
        )

        # Step 6: Mark invitation as accepted
@@ -0,0 +1,38 @@
+from typing import Literal
+
+from fastapi import Request
+from server.constants import IS_FEATURE_ENV, IS_LOCAL_ENV, IS_STAGING_ENV
+from starlette.datastructures import URL
+
+from openhands.app_server.config import get_global_config
+
+
+def get_web_url(request: Request):
+    web_url = get_global_config().web_url
+    if not web_url:
+        scheme = 'http' if request.url.hostname == 'localhost' else 'https'
+        web_url = f'{scheme}://{request.url.netloc}'
+    else:
+        web_url = web_url.rstrip('/')
+    return web_url
+
+
+def get_cookie_domain() -> str | None:
+    config = get_global_config()
+    web_url = config.web_url
+    # for now just use the full hostname except for staging stacks.
+    return (
+        URL(web_url).hostname
+        if web_url and not (IS_FEATURE_ENV or IS_STAGING_ENV or IS_LOCAL_ENV)
+        else None
+    )
+
+
+def get_cookie_samesite() -> Literal['lax', 'strict']:
+    # for localhost and feature/staging stacks we set it to 'lax' as the cookie domain won't allow 'strict'
+    web_url = get_global_config().web_url
+    return (
+        'strict'
+        if web_url and not (IS_FEATURE_ENV or IS_STAGING_ENV or IS_LOCAL_ENV)
+        else 'lax'
+    )
@@ -28,6 +28,9 @@ class OrgMemberStore:
        role_id: int,
        llm_api_key: str,
        status: Optional[str] = None,
+        llm_model: Optional[str] = None,
+        llm_base_url: Optional[str] = None,
+        max_iterations: Optional[int] = None,
    ) -> OrgMember:
        """Add a user to an organization with a specific role."""
        async with a_session_maker() as session:
@@ -37,6 +40,9 @@ class OrgMemberStore:
                role_id=role_id,
                llm_api_key=llm_api_key,
                status=status,
+                llm_model=llm_model,
+                llm_base_url=llm_base_url,
+                max_iterations=max_iterations,
            )
            session.add(org_member)
            await session.commit()
@@ -11,9 +11,10 @@ from pydantic import SecretStr
 from server.auth.token_manager import TokenManager
 from server.constants import LITE_LLM_API_URL
 from server.logger import logger
-from sqlalchemy import select
+from sqlalchemy import select, update
 from sqlalchemy.orm import joinedload
 from storage.database import a_session_maker
+from storage.encrypt_utils import encrypt_value
 from storage.lite_llm_manager import LiteLlmManager, get_openhands_cloud_key_alias
 from storage.org import Org
 from storage.org_member import OrgMember
@@ -186,6 +187,42 @@ class SaasSettingsStore(SettingsStore):
                    if hasattr(model, key):
                        setattr(model, key, value)

+            # Map Settings fields to Org fields with 'default_' prefix
+            # The generic loop above doesn't update these because Org uses
+            # 'default_llm_model' not 'llm_model', etc.
+            # Use exclude_unset to only update explicitly-set fields (allows clearing with null)
+            settings_data = item.model_dump(exclude_unset=True)
+            if 'llm_model' in settings_data:
+                org.default_llm_model = settings_data['llm_model']
+            if 'llm_base_url' in settings_data:
+                org.default_llm_base_url = settings_data['llm_base_url']
+            if 'max_iterations' in settings_data:
+                org.default_max_iterations = settings_data['max_iterations']
+
+            # Propagate LLM settings to all org members
+            # This ensures all members see the same LLM configuration when an admin saves
+            # Note: Concurrent saves by multiple admins will result in last-write-wins.
+            # Consider adding optimistic locking if this becomes a problem.
+            member_update_values: dict = {}
+            if item.llm_model is not None:
+                member_update_values['llm_model'] = item.llm_model
+            if item.llm_base_url is not None:
+                member_update_values['llm_base_url'] = item.llm_base_url
+            if item.max_iterations is not None:
+                member_update_values['max_iterations'] = item.max_iterations
+            if item.llm_api_key is not None:
+                member_update_values['_llm_api_key'] = encrypt_value(
+                    item.llm_api_key.get_secret_value()
+                )
+
+            if member_update_values:
+                stmt = (
+                    update(OrgMember)
+                    .where(OrgMember.org_id == org_id)
+                    .values(**member_update_values)
+                )
+                await session.execute(stmt)
+
            await session.commit()

    @classmethod
@@ -1,562 +0,0 @@
-#!/usr/bin/env python3
-"""
-Common Room Sync
-
-This script queries the database to count conversations created by each user,
-then creates or updates a signal in Common Room for each user with their
-conversation count.
-"""
-
-import asyncio
-import logging
-import os
-import sys
-import time
-from datetime import UTC, datetime
-from typing import Any, Dict, List, Optional, Set
-
-import requests
-from sqlalchemy import text
-
-# Add the parent directory to the path so we can import from storage
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from server.auth.token_manager import get_keycloak_admin
-from storage.database import get_engine
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger('common_room_sync')
-
-# Common Room API configuration
-COMMON_ROOM_API_KEY = os.environ.get('COMMON_ROOM_API_KEY')
-COMMON_ROOM_DESTINATION_SOURCE_ID = os.environ.get('COMMON_ROOM_DESTINATION_SOURCE_ID')
-COMMON_ROOM_API_BASE_URL = 'https://api.commonroom.io/community/v1'
-
-# Sync configuration
-BATCH_SIZE = int(os.environ.get('BATCH_SIZE', '100'))
-KEYCLOAK_BATCH_SIZE = int(os.environ.get('KEYCLOAK_BATCH_SIZE', '20'))
-MAX_RETRIES = int(os.environ.get('MAX_RETRIES', '3'))
-INITIAL_BACKOFF_SECONDS = float(os.environ.get('INITIAL_BACKOFF_SECONDS', '1'))
-MAX_BACKOFF_SECONDS = float(os.environ.get('MAX_BACKOFF_SECONDS', '60'))
-BACKOFF_FACTOR = float(os.environ.get('BACKOFF_FACTOR', '2'))
-RATE_LIMIT = float(os.environ.get('RATE_LIMIT', '2'))  # Requests per second
-
-
-class CommonRoomSyncError(Exception):
-    """Base exception for Common Room sync errors."""
-
-
-class DatabaseError(CommonRoomSyncError):
-    """Exception for database errors."""
-
-
-class CommonRoomAPIError(CommonRoomSyncError):
-    """Exception for Common Room API errors."""
-
-
-class KeycloakClientError(CommonRoomSyncError):
-    """Exception for Keycloak client errors."""
-
-
-def get_recent_conversations(minutes: int = 60) -> List[Dict[str, Any]]:
-    """Get conversations created in the past N minutes.
-
-    Args:
-        minutes: Number of minutes to look back for new conversations.
-
-    Returns:
-        A list of dictionaries, each containing conversation details.
-
-    Raises:
-        DatabaseError: If the database query fails.
-    """
-    try:
-        # Use a different syntax for the interval that works with pg8000
-        query = text("""
-            SELECT
-                conversation_id, user_id, title, created_at
-            FROM
-                conversation_metadata
-            WHERE
-                created_at >= NOW() - (INTERVAL '1 minute' * :minutes)
-            ORDER BY
-                created_at DESC
-        """)
-
-        with get_engine().connect() as connection:
-            result = connection.execute(query, {'minutes': minutes})
-            conversations = [
-                {
-                    'conversation_id': row[0],
-                    'user_id': row[1],
-                    'title': row[2],
-                    'created_at': row[3].isoformat() if row[3] else None,
-                }
-                for row in result
-            ]
-
-        logger.info(
-            f'Retrieved {len(conversations)} conversations created in the past {minutes} minutes'
-        )
-        return conversations
-    except Exception as e:
-        logger.exception(f'Error querying recent conversations: {e}')
-        raise DatabaseError(f'Failed to query recent conversations: {e}')
-
-
-async def get_users_from_keycloak(user_ids: Set[str]) -> Dict[str, Dict[str, Any]]:
-    """Get user information from Keycloak for a set of user IDs.
-
-    Args:
-        user_ids: A set of user IDs to look up.
-
-    Returns:
-        A dictionary mapping user IDs to user information dictionaries.
-
-    Raises:
-        KeycloakClientError: If the Keycloak API call fails.
-    """
-    try:
-        # Get Keycloak admin client
-        keycloak_admin = get_keycloak_admin()
-
-        # Create a dictionary to store user information
-        user_info_dict = {}
-
-        # Convert set to list for easier batching
-        user_id_list = list(user_ids)
-
-        # Process user IDs in batches
-        for i in range(0, len(user_id_list), KEYCLOAK_BATCH_SIZE):
-            batch = user_id_list[i : i + KEYCLOAK_BATCH_SIZE]
-            batch_tasks = []
-
-            # Create tasks for each user ID in the batch
-            for user_id in batch:
-                # Use the Keycloak admin client to get user by ID
-                batch_tasks.append(get_user_by_id(keycloak_admin, user_id))
-
-            # Run the batch of tasks concurrently
-            batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
-
-            # Process the results
-            for user_id, result in zip(batch, batch_results):
-                if isinstance(result, Exception):
-                    logger.warning(f'Error getting user {user_id}: {result}')
-                    continue
-
-                if result and isinstance(result, dict):
-                    user_info_dict[user_id] = {
-                        'username': result.get('username'),
-                        'email': result.get('email'),
-                        'id': result.get('id'),
-                    }
-
-        logger.info(
-            f'Retrieved information for {len(user_info_dict)} users from Keycloak'
-        )
-        return user_info_dict
-
-    except Exception as e:
-        error_msg = f'Error getting users from Keycloak: {e}'
-        logger.exception(error_msg)
-        raise KeycloakClientError(error_msg)
-
-
-async def get_user_by_id(keycloak_admin, user_id: str) -> Optional[Dict[str, Any]]:
-    """Get a user from Keycloak by ID.
-
-    Args:
-        keycloak_admin: The Keycloak admin client.
-        user_id: The user ID to look up.
-
-    Returns:
-        A dictionary with the user's information, or None if not found.
-    """
-    try:
-        # Use the Keycloak admin client to get user by ID
-        user = keycloak_admin.get_user(user_id)
-        if user:
-            logger.debug(
-                f"Found user in Keycloak: {user.get('username')}, {user.get('email')}"
-            )
-            return user
-        else:
-            logger.warning(f'User {user_id} not found in Keycloak')
-            return None
-    except Exception as e:
-        logger.warning(f'Error getting user {user_id} from Keycloak: {e}')
-        return None
-
-
-def get_user_info(
-    user_id: str, user_info_cache: Dict[str, Dict[str, Any]]
-) -> Optional[Dict[str, str]]:
-    """Get the email address and GitHub username for a user from the cache.
-
-    Args:
-        user_id: The user ID to look up.
-        user_info_cache: A dictionary mapping user IDs to user information.
-
-    Returns:
-        A dictionary with the user's email and username, or None if not found.
-    """
-    # Check if the user is in the cache
-    if user_id in user_info_cache:
-        user_info = user_info_cache[user_id]
-        logger.debug(
-            f"Found user info in cache: {user_info.get('username')}, {user_info.get('email')}"
-        )
-        return user_info
-    else:
-        logger.warning(f'User {user_id} not found in user info cache')
-        return None
-
-
-def register_user_in_common_room(
-    user_id: str, email: str, github_username: str
-) -> Dict[str, Any]:
-    """Create or update a user in Common Room.
-
-    Args:
-        user_id: The user ID.
-        email: The user's email address.
-        github_username: The user's GitHub username.
-
-    Returns:
-        The API response from Common Room.
-
-    Raises:
-        CommonRoomAPIError: If the Common Room API request fails.
-    """
-    if not COMMON_ROOM_API_KEY:
-        raise CommonRoomAPIError('COMMON_ROOM_API_KEY environment variable not set')
-
-    if not COMMON_ROOM_DESTINATION_SOURCE_ID:
-        raise CommonRoomAPIError(
-            'COMMON_ROOM_DESTINATION_SOURCE_ID environment variable not set'
-        )
-
-    try:
-        headers = {
-            'Authorization': f'Bearer {COMMON_ROOM_API_KEY}',
-            'Content-Type': 'application/json',
-        }
-
-        # Create or update user in Common Room
-        user_data = {
-            'id': user_id,
-            'email': email,
-            'username': github_username,
-            'github': {'type': 'handle', 'value': github_username},
-        }
-
-        user_url = f'{COMMON_ROOM_API_BASE_URL}/source/{COMMON_ROOM_DESTINATION_SOURCE_ID}/user'
-        user_response = requests.post(user_url, headers=headers, json=user_data)
-
-        if user_response.status_code not in (200, 202):
-            logger.error(
-                f'Failed to create/update user in Common Room: {user_response.text}'
-            )
-            logger.error(f'Response status code: {user_response.status_code}')
-            raise CommonRoomAPIError(
-                f'Failed to create/update user: {user_response.text}'
-            )
-
-        logger.info(
-            f'Registered/updated user {user_id} (GitHub: {github_username}) in Common Room'
-        )
-        return user_response.json()
-    except requests.RequestException as e:
-        logger.exception(f'Error communicating with Common Room API: {e}')
-        raise CommonRoomAPIError(f'Failed to communicate with Common Room API: {e}')
-
-
-def register_conversation_activity(
-    user_id: str,
-    conversation_id: str,
-    conversation_title: str,
-    created_at: datetime,
-    email: str,
-    github_username: str,
-) -> Dict[str, Any]:
-    """Create an activity in Common Room for a new conversation.
-
-    Args:
-        user_id: The user ID who created the conversation.
-        conversation_id: The ID of the conversation.
-        conversation_title: The title of the conversation.
-        created_at: The datetime object when the conversation was created.
-        email: The user's email address.
-        github_username: The user's GitHub username.
-
-    Returns:
-        The API response from Common Room.
-
-    Raises:
-        CommonRoomAPIError: If the Common Room API request fails.
-    """
-    if not COMMON_ROOM_API_KEY:
-        raise CommonRoomAPIError('COMMON_ROOM_API_KEY environment variable not set')
-
-    if not COMMON_ROOM_DESTINATION_SOURCE_ID:
-        raise CommonRoomAPIError(
-            'COMMON_ROOM_DESTINATION_SOURCE_ID environment variable not set'
-        )
-
-    try:
-        headers = {
-            'Authorization': f'Bearer {COMMON_ROOM_API_KEY}',
-            'Content-Type': 'application/json',
-        }
-
-        # Format the datetime object to the expected ISO format
-        formatted_timestamp = (
-            created_at.strftime('%Y-%m-%dT%H:%M:%SZ')
-            if created_at
-            else time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
-        )
-
-        # Create activity for the conversation
-        activity_data = {
-            'id': f'conversation_{conversation_id}',  # Use conversation ID to ensure uniqueness
-            'activityType': 'started_session',
-            'user': {
-                'id': user_id,
-                'email': email,
-                'github': {'type': 'handle', 'value': github_username},
-                'username': github_username,
-            },
-            'activityTitle': {
-                'type': 'text',
-                'value': conversation_title or 'New Conversation',
-            },
-            'content': {
-                'type': 'text',
-                'value': f'Started a new conversation: {conversation_title or "Untitled"}',
-            },
-            'timestamp': formatted_timestamp,
-            'url': f'https://app.all-hands.dev/conversations/{conversation_id}',
-        }
-
-        # Log the activity data for debugging
-        logger.info(f'Activity data payload: {activity_data}')
-
-        activity_url = f'{COMMON_ROOM_API_BASE_URL}/source/{COMMON_ROOM_DESTINATION_SOURCE_ID}/activity'
-        activity_response = requests.post(
-            activity_url, headers=headers, json=activity_data
-        )
-
-        if activity_response.status_code not in (200, 202):
-            logger.error(
-                f'Failed to create activity in Common Room: {activity_response.text}'
-            )
-            logger.error(f'Response status code: {activity_response.status_code}')
-            raise CommonRoomAPIError(
-                f'Failed to create activity: {activity_response.text}'
-            )
-
-        logger.info(
-            f'Registered conversation activity for user {user_id}, conversation {conversation_id}'
-        )
-        return activity_response.json()
-    except requests.RequestException as e:
-        logger.exception(f'Error communicating with Common Room API: {e}')
-        raise CommonRoomAPIError(f'Failed to communicate with Common Room API: {e}')
-
-
-def retry_with_backoff(func, *args, **kwargs):
-    """Retry a function with exponential backoff.
-
-    Args:
-        func: The function to retry.
-        *args: Positional arguments to pass to the function.
-        **kwargs: Keyword arguments to pass to the function.
-
-    Returns:
-        The result of the function call.
-
-    Raises:
-        The last exception raised by the function.
-    """
-    backoff = INITIAL_BACKOFF_SECONDS
-    last_exception = None
-
-    for attempt in range(MAX_RETRIES):
-        try:
-            return func(*args, **kwargs)
-        except Exception as e:
-            last_exception = e
-            logger.warning(f'Attempt {attempt + 1}/{MAX_RETRIES} failed: {e}')
-
-            if attempt < MAX_RETRIES - 1:
-                sleep_time = min(backoff, MAX_BACKOFF_SECONDS)
-                logger.info(f'Retrying in {sleep_time:.2f} seconds...')
-                time.sleep(sleep_time)
-                backoff *= BACKOFF_FACTOR
-            else:
-                logger.exception(f'All {MAX_RETRIES} attempts failed')
-                raise last_exception
-
-
-async def retry_with_backoff_async(func, *args, **kwargs):
-    """Retry an async function with exponential backoff.
-
-    Args:
-        func: The async function to retry.
-        *args: Positional arguments to pass to the function.
-        **kwargs: Keyword arguments to pass to the function.
-
-    Returns:
-        The result of the function call.
-
-    Raises:
-        The last exception raised by the function.
-    """
-    backoff = INITIAL_BACKOFF_SECONDS
-    last_exception = None
-
-    for attempt in range(MAX_RETRIES):
-        try:
-            return await func(*args, **kwargs)
-        except Exception as e:
-            last_exception = e
-            logger.warning(f'Attempt {attempt + 1}/{MAX_RETRIES} failed: {e}')
-
-            if attempt < MAX_RETRIES - 1:
-                sleep_time = min(backoff, MAX_BACKOFF_SECONDS)
-                logger.info(f'Retrying in {sleep_time:.2f} seconds...')
-                await asyncio.sleep(sleep_time)
-                backoff *= BACKOFF_FACTOR
-            else:
-                logger.exception(f'All {MAX_RETRIES} attempts failed')
-                raise last_exception
-
-
-async def async_sync_recent_conversations_to_common_room(minutes: int = 60):
-    """Async main function to sync recent conversations to Common Room.
-
-    Args:
-        minutes: Number of minutes to look back for new conversations.
-    """
-    logger.info(
-        f'Starting Common Room recent conversations sync (past {minutes} minutes)'
-    )
-
-    stats = {
-        'total_conversations': 0,
-        'registered_users': 0,
-        'registered_activities': 0,
-        'errors': 0,
-        'missing_user_info': 0,
-    }
-
-    try:
-        # Get conversations created in the past N minutes
-        recent_conversations = retry_with_backoff(get_recent_conversations, minutes)
-        stats['total_conversations'] = len(recent_conversations)
-
-        logger.info(f'Processing {len(recent_conversations)} recent conversations')
-
-        if not recent_conversations:
-            logger.info('No recent conversations found, exiting')
-            return
-
-        # Extract all unique user IDs
-        user_ids = {conv['user_id'] for conv in recent_conversations if conv['user_id']}
-
-        # Get user information for all users in batches
-        user_info_cache = await retry_with_backoff_async(
-            get_users_from_keycloak, user_ids
-        )
-
-        # Track registered users to avoid duplicate registrations
-        registered_users = set()
-
-        # Process each conversation
-        for conversation in recent_conversations:
-            conversation_id = conversation['conversation_id']
-            user_id = conversation['user_id']
-            title = conversation['title']
-            created_at = conversation[
-                'created_at'
-            ]  # This might be a string or datetime object
-
-            try:
-                # Get user info from cache
-                user_info = get_user_info(user_id, user_info_cache)
-                if not user_info:
-                    logger.warning(
-                        f'Could not find user info for user {user_id}, skipping conversation {conversation_id}'
-                    )
-                    stats['missing_user_info'] += 1
-                    continue
-
-                email = user_info['email']
-                github_username = user_info['username']
-
-                if not email:
-                    logger.warning(
-                        f'User {user_id} has no email, skipping conversation {conversation_id}'
-                    )
-                    stats['errors'] += 1
-                    continue
-
-                # Register user in Common Room if not already registered in this run
-                if user_id not in registered_users:
-                    register_user_in_common_room(user_id, email, github_username)
-                    registered_users.add(user_id)
-                    stats['registered_users'] += 1
-
-                # If created_at is a string, parse it to a datetime object
-                # If it's already a datetime object, use it as is
-                # If it's None, use current time
-                created_at_datetime = (
-                    created_at
-                    if isinstance(created_at, datetime)
-                    else datetime.fromisoformat(created_at.replace('Z', '+00:00'))
-                    if created_at
-                    else datetime.now(UTC)
-                )
-
-                # Register conversation activity with email and github username
-                register_conversation_activity(
-                    user_id,
-                    conversation_id,
-                    title,
-                    created_at_datetime,
-                    email,
-                    github_username,
-                )
-                stats['registered_activities'] += 1
-
-                # Sleep to respect rate limit
-                await asyncio.sleep(1 / RATE_LIMIT)
-            except Exception as e:
-                logger.exception(
-                    f'Error processing conversation {conversation_id} for user {user_id}: {e}'
-                )
-                stats['errors'] += 1
-    except Exception as e:
-        logger.exception(f'Sync failed: {e}')
-        raise
-    finally:
-        logger.info(f'Sync completed. Stats: {stats}')
-
-
-def sync_recent_conversations_to_common_room(minutes: int = 60):
-    """Main function to sync recent conversations to Common Room.
-
-    Args:
-        minutes: Number of minutes to look back for new conversations.
-    """
-    # Run the async function in the event loop
-    asyncio.run(async_sync_recent_conversations_to_common_room(minutes))
-
-
-if __name__ == '__main__':
-    # Default to looking back 60 minutes for new conversations
-    minutes = int(os.environ.get('SYNC_MINUTES', '60'))
-    sync_recent_conversations_to_common_room(minutes)
@@ -1,51 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for Common Room conversation count sync.
-
-This script tests the functionality of the Common Room sync script
-without making any API calls to Common Room or database connections.
-"""
-
-import os
-import sys
-import unittest
-from unittest.mock import MagicMock, patch
-
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from sync.common_room_sync import (
-    retry_with_backoff,
-)
-
-
-class TestCommonRoomSync(unittest.TestCase):
-    """Test cases for Common Room sync functionality."""
-
-    def test_retry_with_backoff(self):
-        """Test the retry_with_backoff function."""
-        # Mock function that succeeds on the second attempt
-        mock_func = MagicMock(
-            side_effect=[Exception('First attempt failed'), 'success']
-        )
-
-        # Set environment variables for testing
-        with patch.dict(
-            os.environ,
-            {
-                'MAX_RETRIES': '3',
-                'INITIAL_BACKOFF_SECONDS': '0.01',
-                'BACKOFF_FACTOR': '2',
-                'MAX_BACKOFF_SECONDS': '1',
-            },
-        ):
-            result = retry_with_backoff(mock_func, 'arg1', 'arg2', kwarg1='kwarg1')
-
-            # Check that the function was called twice
-            self.assertEqual(mock_func.call_count, 2)
-            # Check that the function was called with the correct arguments
-            mock_func.assert_called_with('arg1', 'arg2', kwarg1='kwarg1')
-            # Check that the function returned the expected result
-            self.assertEqual(result, 'success')
-
-
-if __name__ == '__main__':
-    unittest.main()
@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""Test script to verify the conversation count query.
-
-This script tests the database query to count conversations by user,
-without making any API calls to Common Room.
-"""
-
-import os
-import sys
-
-from sqlalchemy import text
-
-# Add the parent directory to the path so we can import from storage
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from storage.database import get_engine
-
-
-def test_conversation_count_query():
-    """Test the query to count conversations by user."""
-    try:
-        # Query to count conversations by user
-        count_query = text("""
-            SELECT
-                user_id, COUNT(*) as conversation_count
-            FROM
-                conversation_metadata
-            GROUP BY
-                user_id
-        """)
-
-        engine = get_engine()
-
-        with engine.connect() as connection:
-            count_result = connection.execute(count_query)
-            user_counts = [
-                {'user_id': row[0], 'conversation_count': row[1]}
-                for row in count_result
-            ]
-
-        print(f'Found {len(user_counts)} users with conversations')
-
-        # Print the first 5 results
-        for i, user_data in enumerate(user_counts[:5]):
-            print(
-                f"User {i+1}: {user_data['user_id']} - {user_data['conversation_count']} conversations"
-            )
-
-        # Test the user_entity query for the first user (if any)
-        if user_counts:
-            first_user_id = user_counts[0]['user_id']
-
-            user_query = text("""
-                SELECT username, email, id
-                FROM user_entity
-                WHERE id = :user_id
-            """)
-
-            with engine.connect() as connection:
-                user_result = connection.execute(user_query, {'user_id': first_user_id})
-                user_row = user_result.fetchone()
-
-                if user_row:
-                    print(f'\nUser details for {first_user_id}:')
-                    print(f'  GitHub Username: {user_row[0]}')
-                    print(f'  Email: {user_row[1]}')
-                    print(f'  ID: {user_row[2]}')
-                else:
-                    print(
-                        f'\nNo user details found for {first_user_id} in user_entity table'
-                    )
-
-        print('\nTest completed successfully')
-    except Exception as e:
-        print(f'Error: {str(e)}')
-        import traceback
-
-        traceback.print_exc()
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    test_conversation_count_query()
@@ -10,6 +10,9 @@ from unittest.mock import AsyncMock, MagicMock
 from uuid import UUID, uuid4

 import pytest
+from server.utils.saas_app_conversation_info_injector import (
+    SaasSQLAppConversationInfoService,
+)
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
 from sqlalchemy.pool import StaticPool
@@ -17,9 +20,6 @@ from storage.base import Base
 from storage.org import Org
 from storage.user import User

-from enterprise.server.utils.saas_app_conversation_info_injector import (
-    SaasSQLAppConversationInfoService,
-)
 from openhands.app_server.app_conversation.app_conversation_models import (
    AppConversationInfo,
 )
@@ -11,7 +11,6 @@ from server.auth.auth_error import AuthError
 from server.auth.saas_user_auth import SaasUserAuth
 from server.auth.user.user_authorizer import UserAuthorizationResponse, UserAuthorizer
 from server.routes.auth import (
-    _extract_recaptcha_state,
    accept_tos,
    authenticate,
    keycloak_callback,
@@ -55,11 +54,12 @@ def mock_response():
 def test_set_response_cookie(mock_response, mock_request):
    """Test setting the auth cookie on a response."""

-    with patch('server.routes.auth.config') as mock_config:
+    with (
+        patch('server.routes.auth.config') as mock_config,
+        patch('server.utils.url_utils.get_global_config') as get_global_config,
+    ):
        mock_config.jwt_secret.get_secret_value.return_value = 'test_secret'
-
-        # Configure mock_request.url.hostname
-        mock_request.url.hostname = 'example.com'
+        get_global_config.return_value = MagicMock(web_url='https://example.com')

        set_response_cookie(
            request=mock_request,
@@ -1036,79 +1036,6 @@ async def test_keycloak_callback_no_email_in_user_info(
        mock_token_manager.check_duplicate_base_email.assert_not_called()


-class TestExtractRecaptchaState:
-    """Tests for _extract_recaptcha_state() helper function."""
-
-    def test_should_extract_redirect_url_and_token_from_new_json_format(self):
-        """Test extraction from new base64-encoded JSON format."""
-        # Arrange
-        state_data = {
-            'redirect_url': 'https://example.com',
-            'recaptcha_token': 'test-token',
-        }
-        encoded_state = base64.urlsafe_b64encode(
-            json.dumps(state_data).encode()
-        ).decode()
-
-        # Act
-        redirect_url, token = _extract_recaptcha_state(encoded_state)
-
-        # Assert
-        assert redirect_url == 'https://example.com'
-        assert token == 'test-token'
-
-    def test_should_handle_old_format_plain_redirect_url(self):
-        """Test handling of old format (plain redirect URL string)."""
-        # Arrange
-        state = 'https://example.com'
-
-        # Act
-        redirect_url, token = _extract_recaptcha_state(state)
-
-        # Assert
-        assert redirect_url == 'https://example.com'
-        assert token is None
-
-    def test_should_handle_none_state(self):
-        """Test handling of None state."""
-        # Arrange
-        state = None
-
-        # Act
-        redirect_url, token = _extract_recaptcha_state(state)
-
-        # Assert
-        assert redirect_url == ''
-        assert token is None
-
-    def test_should_handle_invalid_base64_gracefully(self):
-        """Test handling of invalid base64/JSON (fallback to old format)."""
-        # Arrange
-        state = 'not-valid-base64!!!'
-
-        # Act
-        redirect_url, token = _extract_recaptcha_state(state)
-
-        # Assert
-        assert redirect_url == state
-        assert token is None
-
-    def test_should_handle_missing_redirect_url_in_json(self):
-        """Test handling when redirect_url is missing in JSON."""
-        # Arrange
-        state_data = {'recaptcha_token': 'test-token'}
-        encoded_state = base64.urlsafe_b64encode(
-            json.dumps(state_data).encode()
-        ).decode()
-
-        # Act
-        redirect_url, token = _extract_recaptcha_state(encoded_state)
-
-        # Assert
-        assert redirect_url == ''
-        assert token == 'test-token'
-
-
 class TestKeycloakCallbackRecaptcha:
    """Tests for reCAPTCHA integration in keycloak_callback()."""

@@ -48,7 +48,7 @@ def mock_checkout_request():
            'server': ('test.com', 80),
        }
    )
-    request._base_url = URL('http://test.com/')
+    request._url = URL('http://test.com/')
    return request


@@ -62,7 +62,7 @@ def mock_subscription_request():
            'server': ('test.com', 80),
        }
    )
-    request._base_url = URL('http://test.com/')
+    request._url = URL('http://test.com/')
    return request


@@ -264,7 +264,7 @@ async def test_create_checkout_session_success(
 async def test_success_callback_session_not_found(async_session_maker):
    """Test success callback when billing session is not found."""
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    with (
        patch('server.routes.billing.a_session_maker', async_session_maker),
@@ -281,7 +281,7 @@ async def test_success_callback_stripe_incomplete(
 ):
    """Test success callback when Stripe session is not complete."""
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    session_id = 'test_incomplete_session'
    async with async_session_maker() as session:
@@ -319,7 +319,7 @@ async def test_success_callback_stripe_incomplete(
 async def test_success_callback_success(async_session_maker, test_org, test_user):
    """Test successful payment completion and credit update."""
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    session_id = 'test_success_session'
    async with async_session_maker() as session:
@@ -391,7 +391,7 @@ async def test_success_callback_lite_llm_error(
 ):
    """Test handling of LiteLLM API errors during success callback."""
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    session_id = 'test_litellm_error_session'
    async with async_session_maker() as session:
@@ -445,7 +445,7 @@ async def test_success_callback_lite_llm_update_budget_error_rollback(
    the database transaction rolls back.
    """
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    session_id = 'test_budget_rollback_session'
    async with async_session_maker() as session:
@@ -502,7 +502,7 @@ async def test_success_callback_lite_llm_update_budget_error_rollback(
 async def test_cancel_callback_session_not_found(async_session_maker):
    """Test cancel callback when billing session is not found."""
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    with patch('server.routes.billing.a_session_maker', async_session_maker):
        response = await cancel_callback('nonexistent_session_id', mock_request)
@@ -517,7 +517,7 @@ async def test_cancel_callback_session_not_found(async_session_maker):
 async def test_cancel_callback_success(async_session_maker, test_org, test_user):
    """Test successful cancellation of billing session."""
    mock_request = Request(scope={'type': 'http'})
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    session_id = 'test_cancel_session'
    async with async_session_maker() as session:
@@ -588,7 +588,7 @@ async def test_create_customer_setup_session_success():
            'headers': [],
        }
    )
-    mock_request._base_url = URL('http://test.com/')
+    mock_request._url = URL('http://test.com/')

    mock_customer_info = {'customer_id': 'mock-customer-id', 'org_id': 'mock-org-id'}
    mock_session = MagicMock()
@@ -613,6 +613,6 @@ async def test_create_customer_setup_session_success():
            customer='mock-customer-id',
            mode='setup',
            payment_method_types=['card'],
-            success_url='https://test.com/?setup=success',
-            cancel_url='https://test.com/',
+            success_url='https://test.com?setup=success',
+            cancel_url='https://test.com',
        )
@@ -98,6 +98,11 @@ class TestAcceptInvitationEmailValidation:

        mock_keycloak_user_info = {'email': 'alice@example.com'}  # Email from Keycloak

+        mock_org = MagicMock()
+        mock_org.default_llm_model = 'test-model'
+        mock_org.default_llm_base_url = None
+        mock_org.default_max_iterations = None
+
        with (
            patch(
                'server.services.org_invitation_service.OrgInvitationStore.get_invitation_by_token',
@@ -121,6 +126,10 @@ class TestAcceptInvitationEmailValidation:
                'server.services.org_invitation_service.OrgService.create_litellm_integration',
                new_callable=AsyncMock,
            ) as mock_create_litellm,
+            patch(
+                'server.services.org_invitation_service.OrgStore.get_org_by_id',
+                new_callable=AsyncMock,
+            ) as mock_get_org,
            patch(
                'server.services.org_invitation_service.OrgMemberStore.add_user_to_org',
                new_callable=AsyncMock,
@@ -145,6 +154,7 @@ class TestAcceptInvitationEmailValidation:
            mock_settings = MagicMock()
            mock_settings.llm_api_key = SecretStr('test-key')
            mock_create_litellm.return_value = mock_settings
+            mock_get_org.return_value = mock_org
            mock_update_status.return_value = mock_invitation

            # Act - should not raise error because Keycloak email matches
@@ -214,6 +224,11 @@ class TestAcceptInvitationEmailValidation:

        mock_invitation.email = 'alice@example.com'  # Lowercase in invitation

+        mock_org = MagicMock()
+        mock_org.default_llm_model = 'test-model'
+        mock_org.default_llm_base_url = None
+        mock_org.default_max_iterations = None
+
        with (
            patch(
                'server.services.org_invitation_service.OrgInvitationStore.get_invitation_by_token',
@@ -234,6 +249,10 @@ class TestAcceptInvitationEmailValidation:
                'server.services.org_invitation_service.OrgService.create_litellm_integration',
                new_callable=AsyncMock,
            ) as mock_create_litellm,
+            patch(
+                'server.services.org_invitation_service.OrgStore.get_org_by_id',
+                new_callable=AsyncMock,
+            ) as mock_get_org,
            patch(
                'server.services.org_invitation_service.OrgMemberStore.add_user_to_org',
                new_callable=AsyncMock,
@@ -250,6 +269,7 @@ class TestAcceptInvitationEmailValidation:
            mock_settings = MagicMock()
            mock_settings.llm_api_key = SecretStr('test-key')
            mock_create_litellm.return_value = mock_settings
+            mock_get_org.return_value = mock_org
            mock_update_status.return_value = mock_invitation

            # Act - should not raise error because emails match case-insensitively
@@ -258,6 +278,75 @@ class TestAcceptInvitationEmailValidation:
            # Assert - invitation was accepted (update_invitation_status was called)
            mock_update_status.assert_called_once()

+    @pytest.mark.asyncio
+    async def test_accept_invitation_inherits_org_llm_settings(self, mock_invitation):
+        """Test that new members inherit the organization's LLM settings when accepting invitation."""
+        # Arrange
+        user_id = UUID('87654321-4321-8765-4321-876543218765')
+        token = 'inv-test-token-12345'
+
+        mock_user = MagicMock()
+        mock_user.id = user_id
+        mock_user.email = 'alice@example.com'
+
+        mock_org = MagicMock()
+        mock_org.default_llm_model = 'claude-sonnet-4'
+        mock_org.default_llm_base_url = 'https://api.anthropic.com'
+        mock_org.default_max_iterations = 100
+
+        with (
+            patch(
+                'server.services.org_invitation_service.OrgInvitationStore.get_invitation_by_token',
+                new_callable=AsyncMock,
+            ) as mock_get_invitation,
+            patch(
+                'server.services.org_invitation_service.OrgInvitationStore.is_token_expired'
+            ) as mock_is_expired,
+            patch(
+                'server.services.org_invitation_service.UserStore.get_user_by_id',
+                new_callable=AsyncMock,
+            ) as mock_get_user,
+            patch(
+                'server.services.org_invitation_service.OrgMemberStore.get_org_member',
+                new_callable=AsyncMock,
+            ) as mock_get_member,
+            patch(
+                'server.services.org_invitation_service.OrgService.create_litellm_integration',
+                new_callable=AsyncMock,
+            ) as mock_create_litellm,
+            patch(
+                'server.services.org_invitation_service.OrgStore.get_org_by_id',
+                new_callable=AsyncMock,
+            ) as mock_get_org,
+            patch(
+                'server.services.org_invitation_service.OrgMemberStore.add_user_to_org',
+                new_callable=AsyncMock,
+            ) as mock_add_user,
+            patch(
+                'server.services.org_invitation_service.OrgInvitationStore.update_invitation_status',
+                new_callable=AsyncMock,
+            ) as mock_update_status,
+        ):
+            mock_get_invitation.return_value = mock_invitation
+            mock_is_expired.return_value = False
+            mock_get_user.return_value = mock_user
+            mock_get_member.return_value = None
+            mock_settings = MagicMock()
+            mock_settings.llm_api_key = SecretStr('test-key')
+            mock_create_litellm.return_value = mock_settings
+            mock_get_org.return_value = mock_org
+            mock_update_status.return_value = mock_invitation
+
+            # Act
+            await OrgInvitationService.accept_invitation(token, user_id)
+
+            # Assert - verify add_user_to_org was called with org's LLM settings
+            mock_add_user.assert_called_once()
+            call_kwargs = mock_add_user.call_args.kwargs
+            assert call_kwargs['llm_model'] == 'claude-sonnet-4'
+            assert call_kwargs['llm_base_url'] == 'https://api.anthropic.com'
+            assert call_kwargs['max_iterations'] == 100
+

 class TestCreateInvitationsBatch:
    """Test cases for batch invitation creation."""
@@ -246,6 +246,43 @@ async def test_add_user_to_org(async_session_maker):
        assert org_member.status == 'active'


+@pytest.mark.asyncio
+async def test_add_user_to_org_with_llm_settings(async_session_maker):
+    """Test that add_user_to_org correctly sets inherited LLM settings from organization."""
+    # Arrange
+    async with async_session_maker() as session:
+        org = Org(name='test-org-llm')
+        session.add(org)
+        await session.flush()
+
+        user = User(id=uuid.uuid4(), current_org_id=org.id)
+        role = Role(name='member', rank=2)
+        session.add_all([user, role])
+        await session.commit()
+        org_id = org.id
+        user_id = user.id
+        role_id = role.id
+
+    # Act
+    with patch('storage.org_member_store.a_session_maker', async_session_maker):
+        org_member = await OrgMemberStore.add_user_to_org(
+            org_id=org_id,
+            user_id=user_id,
+            role_id=role_id,
+            llm_api_key='test-api-key',
+            status='active',
+            llm_model='claude-sonnet-4',
+            llm_base_url='https://api.example.com',
+            max_iterations=50,
+        )
+
+    # Assert
+    assert org_member is not None
+    assert org_member.llm_model == 'claude-sonnet-4'
+    assert org_member.llm_base_url == 'https://api.example.com'
+    assert org_member.max_iterations == 50
+
+
@pytest.mark.asyncio
 async def test_update_user_role_in_org(async_session_maker):
    # Test updating user role in org
@@ -1,3 +1,4 @@
+import uuid
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
@@ -233,3 +234,206 @@ async def test_ensure_api_key_generates_new_key_when_verification_fails(

        assert item.llm_api_key is not None
        assert item.llm_api_key.get_secret_value() == new_key
+
+
+@pytest.fixture
+def org_with_multiple_members_fixture(session_maker):
+    """Set up an organization with multiple members for testing LLM settings propagation.
+
+    Uses sync session to avoid UUID conversion issues with async SQLite.
+    """
+    from storage.encrypt_utils import decrypt_value
+    from storage.org import Org
+    from storage.org_member import OrgMember
+    from storage.role import Role
+    from storage.user import User
+
+    # Use realistic UUIDs that work well with SQLite
+    org_id = uuid.UUID('5594c7b6-f959-4b81-92e9-b09c206f5081')
+    admin_user_id = uuid.UUID('5594c7b6-f959-4b81-92e9-b09c206f5082')
+    member1_user_id = uuid.UUID('5594c7b6-f959-4b81-92e9-b09c206f5083')
+    member2_user_id = uuid.UUID('5594c7b6-f959-4b81-92e9-b09c206f5084')
+
+    with session_maker() as session:
+        # Create role
+        role = Role(id=10, name='member', rank=3)
+        session.add(role)
+
+        # Create org
+        org = Org(
+            id=org_id,
+            name='test-org',
+            org_version=1,
+            enable_default_condenser=True,
+            enable_proactive_conversation_starters=True,
+        )
+        session.add(org)
+
+        # Create users
+        admin_user = User(
+            id=admin_user_id, current_org_id=org_id, user_consents_to_analytics=True
+        )
+        session.add(admin_user)
+
+        member1_user = User(
+            id=member1_user_id, current_org_id=org_id, user_consents_to_analytics=True
+        )
+        session.add(member1_user)
+
+        member2_user = User(
+            id=member2_user_id, current_org_id=org_id, user_consents_to_analytics=True
+        )
+        session.add(member2_user)
+
+        # Create org members with DIFFERENT initial LLM settings
+        admin_member = OrgMember(
+            org_id=org_id,
+            user_id=admin_user_id,
+            role_id=10,
+            llm_api_key='admin-initial-key',
+            llm_model='old-model-v1',
+            llm_base_url='http://old-url-1.com',
+            max_iterations=10,
+            status='active',
+        )
+        session.add(admin_member)
+
+        member1 = OrgMember(
+            org_id=org_id,
+            user_id=member1_user_id,
+            role_id=10,
+            llm_api_key='member1-initial-key',
+            llm_model='old-model-v2',
+            llm_base_url='http://old-url-2.com',
+            max_iterations=20,
+            status='active',
+        )
+        session.add(member1)
+
+        member2 = OrgMember(
+            org_id=org_id,
+            user_id=member2_user_id,
+            role_id=10,
+            llm_api_key='member2-initial-key',
+            llm_model='old-model-v3',
+            llm_base_url='http://old-url-3.com',
+            max_iterations=30,
+            status='active',
+        )
+        session.add(member2)
+
+        session.commit()
+
+    return {
+        'org_id': org_id,
+        'admin_user_id': admin_user_id,
+        'member1_user_id': member1_user_id,
+        'member2_user_id': member2_user_id,
+        'decrypt_value': decrypt_value,
+    }
+
+
+@pytest.mark.asyncio
+async def test_store_propagates_llm_settings_to_all_org_members(
+    session_maker, async_session_maker, mock_config, org_with_multiple_members_fixture
+):
+    """When admin saves LLM settings, all org members should receive the updated settings.
+
+    This test verifies using a real database that:
+    1. The bulk UPDATE targets the correct organization (WHERE clause is correct)
+    2. All LLM fields are correctly set (llm_model, llm_base_url, max_iterations, llm_api_key)
+    3. The llm_api_key is properly encrypted
+    4. All members in the org receive the same updated values
+    """
+    from sqlalchemy import select
+    from storage.org_member import OrgMember
+
+    # Arrange
+    fixture = org_with_multiple_members_fixture
+    org_id = fixture['org_id']
+    admin_user_id = str(fixture['admin_user_id'])
+    decrypt_value = fixture['decrypt_value']
+
+    store = SaasSettingsStore(admin_user_id, mock_config)
+
+    new_settings = DataSettings(
+        llm_model='new-shared-model/gpt-4',
+        llm_base_url='http://new-shared-url.com',
+        max_iterations=100,
+        llm_api_key=SecretStr('new-shared-api-key'),
+    )
+
+    # Act - call store() with async session
+    with patch('storage.saas_settings_store.a_session_maker', async_session_maker):
+        await store.store(new_settings)
+
+    # Assert - verify ALL org members have the updated LLM settings using sync session
+    with session_maker() as session:
+        result = session.execute(select(OrgMember).where(OrgMember.org_id == org_id))
+        members = result.scalars().all()
+
+        # Verify we have all 3 members
+        assert len(members) == 3, f'Expected 3 org members, got {len(members)}'
+
+        for member in members:
+            # Verify LLM model is updated
+            assert (
+                member.llm_model == 'new-shared-model/gpt-4'
+            ), f'Expected llm_model to be updated for member {member.user_id}'
+
+            # Verify LLM base URL is updated
+            assert (
+                member.llm_base_url == 'http://new-shared-url.com'
+            ), f'Expected llm_base_url to be updated for member {member.user_id}'
+
+            # Verify max_iterations is updated
+            assert (
+                member.max_iterations == 100
+            ), f'Expected max_iterations to be 100 for member {member.user_id}'
+
+            # Verify the API key is encrypted and decrypts to the correct value
+            decrypted_key = decrypt_value(member._llm_api_key)
+            assert (
+                decrypted_key == 'new-shared-api-key'
+            ), f'Expected llm_api_key to decrypt to new-shared-api-key for member {member.user_id}'
+
+
+@pytest.mark.asyncio
+async def test_store_updates_org_default_llm_settings(
+    session_maker, async_session_maker, mock_config, org_with_multiple_members_fixture
+):
+    """When admin saves LLM settings, org's default_llm_model/base_url/max_iterations should be updated.
+
+    This test verifies that the Org table's default settings are updated so that
+    new members joining later will inherit the correct LLM configuration.
+    """
+    from sqlalchemy import select
+    from storage.org import Org
+
+    # Arrange
+    fixture = org_with_multiple_members_fixture
+    org_id = fixture['org_id']
+    admin_user_id = str(fixture['admin_user_id'])
+
+    store = SaasSettingsStore(admin_user_id, mock_config)
+
+    new_settings = DataSettings(
+        llm_model='anthropic/claude-sonnet-4',
+        llm_base_url='https://api.anthropic.com/v1',
+        max_iterations=75,
+        llm_api_key=SecretStr('test-api-key'),
+    )
+
+    # Act
+    with patch('storage.saas_settings_store.a_session_maker', async_session_maker):
+        await store.store(new_settings)
+
+    # Assert - verify org's default fields were updated
+    with session_maker() as session:
+        result = session.execute(select(Org).where(Org.id == org_id))
+        org = result.scalars().first()
+
+        assert org is not None
+        assert org.default_llm_model == 'anthropic/claude-sonnet-4'
+        assert org.default_llm_base_url == 'https://api.anthropic.com/v1'
+        assert org.default_max_iterations == 75
@@ -0,0 +1 @@
+# Tests for enterprise server utils
@@ -0,0 +1,425 @@
+"""Tests for URL utility functions that prevent URL hijacking attacks."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestGetWebUrl:
+    """Tests for get_web_url function."""
+
+    @pytest.fixture
+    def mock_request(self):
+        """Create a mock FastAPI request object."""
+        request = MagicMock()
+        request.url = MagicMock()
+        return request
+
+    def test_configured_web_url_is_used(self, mock_request):
+        """When web_url is configured, it should be used instead of request URL."""
+        from server.utils.url_utils import get_web_url
+
+        mock_request.url.hostname = 'evil-attacker.com'
+        mock_request.url.netloc = 'evil-attacker.com:443'
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev'
+
+        with patch(
+            'server.utils.url_utils.get_global_config', return_value=mock_config
+        ):
+            result = get_web_url(mock_request)
+
+        assert result == 'https://app.all-hands.dev'
+        # Should not use any info from the potentially poisoned request
+        assert 'evil-attacker.com' not in result
+
+    def test_configured_web_url_trailing_slash_stripped(self, mock_request):
+        """Configured web_url should have trailing slashes stripped."""
+        from server.utils.url_utils import get_web_url
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev/'
+
+        with patch(
+            'server.utils.url_utils.get_global_config', return_value=mock_config
+        ):
+            result = get_web_url(mock_request)
+
+        assert result == 'https://app.all-hands.dev'
+        assert not result.endswith('/')
+
+    def test_unconfigured_web_url_localhost_uses_http(self, mock_request):
+        """When web_url is not configured and hostname is localhost, use http."""
+        from server.utils.url_utils import get_web_url
+
+        mock_request.url.hostname = 'localhost'
+        mock_request.url.netloc = 'localhost:3000'
+
+        mock_config = MagicMock()
+        mock_config.web_url = None
+
+        with patch(
+            'server.utils.url_utils.get_global_config', return_value=mock_config
+        ):
+            result = get_web_url(mock_request)
+
+        assert result == 'http://localhost:3000'
+
+    def test_unconfigured_web_url_non_localhost_uses_https(self, mock_request):
+        """When web_url is not configured and hostname is not localhost, use https."""
+        from server.utils.url_utils import get_web_url
+
+        mock_request.url.hostname = 'example.com'
+        mock_request.url.netloc = 'example.com:443'
+
+        mock_config = MagicMock()
+        mock_config.web_url = None
+
+        with patch(
+            'server.utils.url_utils.get_global_config', return_value=mock_config
+        ):
+            result = get_web_url(mock_request)
+
+        assert result == 'https://example.com:443'
+
+    def test_unconfigured_web_url_empty_string_fallback(self, mock_request):
+        """Empty string web_url should trigger fallback."""
+        from server.utils.url_utils import get_web_url
+
+        mock_request.url.hostname = 'localhost'
+        mock_request.url.netloc = 'localhost:3000'
+
+        mock_config = MagicMock()
+        mock_config.web_url = ''
+
+        with patch(
+            'server.utils.url_utils.get_global_config', return_value=mock_config
+        ):
+            result = get_web_url(mock_request)
+
+        assert result == 'http://localhost:3000'
+
+
+class TestGetCookieDomain:
+    """Tests for get_cookie_domain function."""
+
+    def test_production_with_configured_web_url(self):
+        """In production with web_url configured, should return hostname."""
+        from server.utils.url_utils import get_cookie_domain
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_domain()
+
+        assert result == 'app.all-hands.dev'
+
+    def test_production_without_web_url_returns_none(self):
+        """In production without web_url configured, should return None."""
+        from server.utils.url_utils import get_cookie_domain
+
+        mock_config = MagicMock()
+        mock_config.web_url = None
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_domain()
+
+        assert result is None
+
+    def test_local_env_returns_none(self):
+        """In local environment, should return None for cookie domain."""
+        from server.utils.url_utils import get_cookie_domain
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', True),
+        ):
+            result = get_cookie_domain()
+
+        assert result is None
+
+    def test_staging_env_returns_none(self):
+        """In staging environment, should return None for cookie domain."""
+        from server.utils.url_utils import get_cookie_domain
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://staging.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', True),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_domain()
+
+        assert result is None
+
+    def test_feature_env_returns_none(self):
+        """In feature environment, should return None for cookie domain."""
+        from server.utils.url_utils import get_cookie_domain
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://feature-123.staging.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', True),
+            patch('server.utils.url_utils.IS_STAGING_ENV', True),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_domain()
+
+        assert result is None
+
+
+class TestGetCookieSamesite:
+    """Tests for get_cookie_samesite function."""
+
+    def test_production_with_configured_web_url_returns_strict(self):
+        """In production with web_url configured, should return 'strict'."""
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_samesite()
+
+        assert result == 'strict'
+
+    def test_production_without_web_url_returns_lax(self):
+        """In production without web_url configured, should return 'lax'."""
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = None
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_samesite()
+
+        assert result == 'lax'
+
+    def test_local_env_returns_lax(self):
+        """In local environment, should return 'lax'."""
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'http://localhost:3000'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', True),
+        ):
+            result = get_cookie_samesite()
+
+        assert result == 'lax'
+
+    def test_staging_env_returns_lax(self):
+        """In staging environment, should return 'lax'."""
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://staging.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', True),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_samesite()
+
+        assert result == 'lax'
+
+    def test_feature_env_returns_lax(self):
+        """In feature environment, should return 'lax'."""
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://feature-xyz.staging.all-hands.dev'
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', True),
+            patch('server.utils.url_utils.IS_STAGING_ENV', True),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_samesite()
+
+        assert result == 'lax'
+
+    def test_empty_web_url_returns_lax(self):
+        """Empty web_url should be treated as unconfigured and return 'lax'."""
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = ''
+
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            result = get_cookie_samesite()
+
+        assert result == 'lax'
+
+
+class TestSecurityScenarios:
+    """Tests for security-critical scenarios."""
+
+    @pytest.fixture
+    def mock_request(self):
+        """Create a mock FastAPI request object."""
+        request = MagicMock()
+        request.url = MagicMock()
+        return request
+
+    def test_header_poisoning_attack_blocked_when_configured(self, mock_request):
+        """
+        When web_url is configured, X-Forwarded-* header poisoning should not affect
+        the returned URL.
+        """
+        from server.utils.url_utils import get_web_url
+
+        # Simulate a poisoned request where attacker controls headers
+        mock_request.url.hostname = 'evil.com'
+        mock_request.url.netloc = 'evil.com:443'
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev'
+
+        with patch(
+            'server.utils.url_utils.get_global_config', return_value=mock_config
+        ):
+            result = get_web_url(mock_request)
+
+        # Should use configured web_url, not the poisoned request data
+        assert result == 'https://app.all-hands.dev'
+        assert 'evil' not in result
+
+    def test_cookie_domain_not_set_in_dev_environments(self):
+        """
+        Cookie domain should not be set in development environments to prevent
+        cookies from leaking to other subdomains.
+        """
+        from server.utils.url_utils import get_cookie_domain
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://my-feature.staging.all-hands.dev'
+
+        # Test each dev environment
+        for env_name, env_config in [
+            (
+                'local',
+                {
+                    'IS_LOCAL_ENV': True,
+                    'IS_STAGING_ENV': False,
+                    'IS_FEATURE_ENV': False,
+                },
+            ),
+            (
+                'staging',
+                {
+                    'IS_LOCAL_ENV': False,
+                    'IS_STAGING_ENV': True,
+                    'IS_FEATURE_ENV': False,
+                },
+            ),
+            (
+                'feature',
+                {'IS_LOCAL_ENV': False, 'IS_STAGING_ENV': True, 'IS_FEATURE_ENV': True},
+            ),
+        ]:
+            with (
+                patch(
+                    'server.utils.url_utils.get_global_config', return_value=mock_config
+                ),
+                patch(
+                    'server.utils.url_utils.IS_FEATURE_ENV',
+                    env_config['IS_FEATURE_ENV'],
+                ),
+                patch(
+                    'server.utils.url_utils.IS_STAGING_ENV',
+                    env_config['IS_STAGING_ENV'],
+                ),
+                patch(
+                    'server.utils.url_utils.IS_LOCAL_ENV', env_config['IS_LOCAL_ENV']
+                ),
+            ):
+                result = get_cookie_domain()
+                assert result is None, f'Expected None for {env_name} environment'
+
+    def test_strict_samesite_only_in_production(self):
+        """
+        SameSite=strict should only be set in production to ensure proper
+        security without breaking OAuth flows in development.
+        """
+        from server.utils.url_utils import get_cookie_samesite
+
+        mock_config = MagicMock()
+        mock_config.web_url = 'https://app.all-hands.dev'
+
+        # Production should be strict
+        with (
+            patch('server.utils.url_utils.get_global_config', return_value=mock_config),
+            patch('server.utils.url_utils.IS_FEATURE_ENV', False),
+            patch('server.utils.url_utils.IS_STAGING_ENV', False),
+            patch('server.utils.url_utils.IS_LOCAL_ENV', False),
+        ):
+            assert get_cookie_samesite() == 'strict'
+
+        # Dev environments should be lax
+        for env_config in [
+            {'IS_LOCAL_ENV': True, 'IS_STAGING_ENV': False, 'IS_FEATURE_ENV': False},
+            {'IS_LOCAL_ENV': False, 'IS_STAGING_ENV': True, 'IS_FEATURE_ENV': False},
+            {'IS_LOCAL_ENV': False, 'IS_STAGING_ENV': True, 'IS_FEATURE_ENV': True},
+        ]:
+            with (
+                patch(
+                    'server.utils.url_utils.get_global_config', return_value=mock_config
+                ),
+                patch(
+                    'server.utils.url_utils.IS_FEATURE_ENV',
+                    env_config['IS_FEATURE_ENV'],
+                ),
+                patch(
+                    'server.utils.url_utils.IS_STAGING_ENV',
+                    env_config['IS_STAGING_ENV'],
+                ),
+                patch(
+                    'server.utils.url_utils.IS_LOCAL_ENV', env_config['IS_LOCAL_ENV']
+                ),
+            ):
+                assert get_cookie_samesite() == 'lax'
@@ -84,12 +84,12 @@ describe("TaskTrackingObservationContent", () => {
    expect(taskItems).toHaveLength(3);
  });

-  it("displays task IDs and notes", () => {
+  it("does not display task IDs but displays notes", () => {
    render(<TaskTrackingObservationContent event={mockEvent} />);

-    expect(screen.getByText("ID: task-1")).toBeInTheDocument();
-    expect(screen.getByText("ID: task-2")).toBeInTheDocument();
-    expect(screen.getByText("ID: task-3")).toBeInTheDocument();
+    expect(screen.queryByText("ID: task-1")).not.toBeInTheDocument();
+    expect(screen.queryByText("ID: task-2")).not.toBeInTheDocument();
+    expect(screen.queryByText("ID: task-3")).not.toBeInTheDocument();

    expect(screen.getByText("Notes: This is a test task")).toBeInTheDocument();
    expect(
@@ -0,0 +1,83 @@
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { ConversationTabsContextMenu } from "#/components/features/conversation/conversation-tabs/conversation-tabs-context-menu";
+
+const CONVERSATION_ID = "conv-abc123";
+
+vi.mock("#/hooks/use-conversation-id", () => ({
+  useConversationId: () => ({ conversationId: CONVERSATION_ID }),
+}));
+
+let mockHasTaskList = false;
+vi.mock("#/hooks/use-task-list", () => ({
+  useTaskList: () => ({
+    hasTaskList: mockHasTaskList,
+    taskList: [],
+  }),
+}));
+
+describe("ConversationTabsContextMenu", () => {
+  beforeEach(() => {
+    localStorage.clear();
+    mockHasTaskList = false;
+  });
+
+  it("should render nothing when isOpen is false", () => {
+    const { container } = render(
+      <ConversationTabsContextMenu isOpen={false} onClose={vi.fn()} />,
+    );
+
+    expect(container.innerHTML).toBe("");
+  });
+
+  it("should render all default tabs when open", () => {
+    render(<ConversationTabsContextMenu isOpen={true} onClose={vi.fn()} />);
+
+    const expectedTabs = [
+      "COMMON$PLANNER",
+      "COMMON$CHANGES",
+      "COMMON$CODE",
+      "COMMON$TERMINAL",
+      "COMMON$APP",
+      "COMMON$BROWSER",
+    ];
+    for (const tab of expectedTabs) {
+      expect(screen.getByText(tab)).toBeInTheDocument();
+    }
+  });
+
+  it("should re-pin a tab when clicking an unpinned tab", async () => {
+    const user = userEvent.setup();
+
+    render(<ConversationTabsContextMenu isOpen={true} onClose={vi.fn()} />);
+
+    const terminalItem = screen.getByText("COMMON$TERMINAL");
+
+    // Unpin
+    await user.click(terminalItem);
+    let storedState = JSON.parse(
+      localStorage.getItem(`conversation-state-${CONVERSATION_ID}`)!,
+    );
+    expect(storedState.unpinnedTabs).toContain("terminal");
+
+    // Re-pin
+    await user.click(terminalItem);
+    storedState = JSON.parse(
+      localStorage.getItem(`conversation-state-${CONVERSATION_ID}`)!,
+    );
+    expect(storedState.unpinnedTabs).not.toContain("terminal");
+  });
+
+  describe("with tasklist", () => {
+    beforeEach(() => {
+      mockHasTaskList = true;
+    });
+
+    it("should show tasklist in context menu when hasTaskList is true", () => {
+      render(<ConversationTabsContextMenu isOpen={true} onClose={vi.fn()} />);
+
+      expect(screen.getByText("COMMON$TASK_LIST")).toBeInTheDocument();
+    });
+  });
+});
@@ -4,7 +4,6 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
 import { MemoryRouter } from "react-router";
 import { ConversationTabs } from "#/components/features/conversation/conversation-tabs/conversation-tabs";
-import { ConversationTabsContextMenu } from "#/components/features/conversation/conversation-tabs/conversation-tabs-context-menu";
 import { useConversationStore } from "#/stores/conversation-store";

 const TASK_CONVERSATION_ID = "task-ec03fb2ab8604517b24af632b058c2fd";
@@ -16,6 +15,14 @@ vi.mock("#/hooks/use-conversation-id", () => ({
  useConversationId: () => ({ conversationId: mockConversationId }),
 }));

+let mockHasTaskList = false;
+vi.mock("#/hooks/use-task-list", () => ({
+  useTaskList: () => ({
+    hasTaskList: mockHasTaskList,
+    taskList: [],
+  }),
+}));
+
 const createWrapper = (conversationId: string) => {
  return ({ children }: { children: React.ReactNode }) => (
    <MemoryRouter initialEntries={[`/conversations/${conversationId}`]}>
@@ -31,6 +38,7 @@ describe("ConversationTabs localStorage behavior", () => {
    localStorage.clear();
    vi.resetAllMocks();
    mockConversationId = TASK_CONVERSATION_ID;
+    mockHasTaskList = false;
    useConversationStore.setState({
      selectedTab: null,
      isRightPanelShown: false,
@@ -71,47 +79,6 @@ describe("ConversationTabs localStorage behavior", () => {
      expect(parsed).toHaveProperty("rightPanelShown");
      expect(parsed).toHaveProperty("unpinnedTabs");
    });
-
-    it("should store unpinned tabs in consolidated key via context menu", async () => {
-      mockConversationId = REAL_CONVERSATION_ID;
-      const user = userEvent.setup();
-
-      render(<ConversationTabsContextMenu isOpen={true} onClose={vi.fn()} />);
-
-      const terminalItem = screen.getByText("COMMON$TERMINAL");
-      await user.click(terminalItem);
-
-      const consolidatedKey = `conversation-state-${REAL_CONVERSATION_ID}`;
-      const storedState = localStorage.getItem(consolidatedKey);
-      expect(storedState).not.toBeNull();
-
-      const parsed = JSON.parse(storedState!);
-      expect(parsed.unpinnedTabs).toContain("terminal");
-    });
-
-    it("should hide a tab after unpinning it from context menu", async () => {
-      mockConversationId = REAL_CONVERSATION_ID;
-      const user = userEvent.setup();
-
-      render(
-        <>
-          <ConversationTabs />
-          <ConversationTabsContextMenu isOpen={true} onClose={vi.fn()} />
-        </>,
-        { wrapper: createWrapper(REAL_CONVERSATION_ID) },
-      );
-
-      expect(
-        screen.getByTestId("conversation-tab-terminal"),
-      ).toBeInTheDocument();
-
-      const terminalItem = screen.getByText("COMMON$TERMINAL");
-      await user.click(terminalItem);
-
-      expect(
-        screen.queryByTestId("conversation-tab-terminal"),
-      ).not.toBeInTheDocument();
-    });
  });

  describe("hook integration", () => {
@@ -205,4 +172,37 @@ describe("ConversationTabs localStorage behavior", () => {
      expect(storedState.selectedTab).toBe("browser");
    });
  });
+
+  describe("tasklist tab", () => {
+    beforeEach(() => {
+      mockConversationId = REAL_CONVERSATION_ID;
+      mockHasTaskList = true;
+    });
+
+    it("should show tasklist tab when hasTaskList is true", () => {
+      render(<ConversationTabs />, {
+        wrapper: createWrapper(REAL_CONVERSATION_ID),
+      });
+
+      expect(
+        screen.getByTestId("conversation-tab-tasklist"),
+      ).toBeInTheDocument();
+    });
+
+    it("should select tasklist tab when clicked", async () => {
+      const user = userEvent.setup();
+
+      render(<ConversationTabs />, {
+        wrapper: createWrapper(REAL_CONVERSATION_ID),
+      });
+
+      const tasklistTab = screen.getByTestId("conversation-tab-tasklist");
+      await user.click(tasklistTab);
+
+      const { selectedTab, hasRightPanelToggled } =
+        useConversationStore.getState();
+      expect(selectedTab).toBe("tasklist");
+      expect(hasRightPanelToggled).toBe(true);
+    });
+  });
 });
@@ -0,0 +1,279 @@
+import { describe, expect, it, beforeEach } from "vitest";
+import { renderHook, act } from "@testing-library/react";
+import { useTaskList } from "#/hooks/use-task-list";
+import { useEventStore } from "#/stores/use-event-store";
+import type { OHEvent } from "#/stores/use-event-store";
+import type { TaskTrackingObservation } from "#/types/core/observations";
+
+function createV0TaskTrackingObservation(
+  id: number,
+  command: string,
+  taskList: TaskTrackingObservation["extras"]["task_list"],
+): TaskTrackingObservation {
+  return {
+    id,
+    source: "agent",
+    observation: "task_tracking",
+    message: "Task tracking update",
+    timestamp: `2025-07-01T00:00:0${id}Z`,
+    cause: 0,
+    content: "",
+    extras: {
+      command,
+      task_list: taskList,
+    },
+  };
+}
+
+function createV1TaskTrackerObservation(
+  id: string,
+  command: string,
+  taskList: Array<{
+    title: string;
+    notes: string;
+    status: "todo" | "in_progress" | "done";
+  }>,
+): OHEvent {
+  return {
+    id,
+    timestamp: `2025-07-01T00:00:0${id}Z`,
+    source: "environment",
+    tool_name: "task_tracker",
+    tool_call_id: `call_${id}`,
+    action_id: `action_${id}`,
+    observation: {
+      kind: "TaskTrackerObservation",
+      content: "Task list updated",
+      command,
+      task_list: taskList,
+    },
+  } as unknown as OHEvent;
+}
+
+beforeEach(() => {
+  useEventStore.setState({
+    events: [],
+    eventIds: new Set(),
+    uiEvents: [],
+  });
+});
+
+describe("useTaskList", () => {
+  it("returns empty taskList and hasTaskList=false when no events exist", () => {
+    const { result } = renderHook(() => useTaskList());
+
+    expect(result.current.taskList).toEqual([]);
+    expect(result.current.hasTaskList).toBe(false);
+  });
+
+  it("returns empty taskList when no task tracking observations exist", () => {
+    useEventStore.setState({
+      events: [
+        {
+          id: 1,
+          source: "user",
+          action: "message",
+          args: { content: "Hello", image_urls: [], file_urls: [] },
+          message: "Hello",
+          timestamp: "2025-07-01T00:00:01Z",
+        },
+      ],
+      eventIds: new Set([1]),
+      uiEvents: [],
+    });
+
+    const { result } = renderHook(() => useTaskList());
+
+    expect(result.current.taskList).toEqual([]);
+    expect(result.current.hasTaskList).toBe(false);
+  });
+
+  describe("v0 events", () => {
+    it('returns the task list from a TaskTrackingObservation with command="plan"', () => {
+      const tasks = [
+        { id: "1", title: "First task", status: "todo" as const },
+        { id: "2", title: "Second task", status: "in_progress" as const },
+      ];
+      const event = createV0TaskTrackingObservation(1, "plan", tasks);
+
+      useEventStore.setState({
+        events: [event],
+        eventIds: new Set([1]),
+        uiEvents: [event],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual(tasks);
+      expect(result.current.hasTaskList).toBe(true);
+    });
+
+    it('ignores TaskTrackingObservation events with command !== "plan"', () => {
+      const tasks = [{ id: "1", title: "First task", status: "todo" as const }];
+      const event = createV0TaskTrackingObservation(1, "update", tasks);
+
+      useEventStore.setState({
+        events: [event],
+        eventIds: new Set([1]),
+        uiEvents: [event],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual([]);
+      expect(result.current.hasTaskList).toBe(false);
+    });
+
+    it("returns the latest task list when multiple plan events exist", () => {
+      const earlyTasks = [
+        { id: "1", title: "First task", status: "todo" as const },
+      ];
+      const lateTasks = [
+        { id: "1", title: "First task", status: "done" as const },
+        { id: "2", title: "New task", status: "in_progress" as const },
+      ];
+
+      const event1 = createV0TaskTrackingObservation(1, "plan", earlyTasks);
+      const event2 = createV0TaskTrackingObservation(2, "plan", lateTasks);
+
+      useEventStore.setState({
+        events: [event1, event2],
+        eventIds: new Set([1, 2]),
+        uiEvents: [event1, event2],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual(lateTasks);
+      expect(result.current.hasTaskList).toBe(true);
+    });
+
+    it("updates when new events are added to the store", () => {
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.hasTaskList).toBe(false);
+
+      const tasks = [{ id: "1", title: "New task", status: "todo" as const }];
+      const event = createV0TaskTrackingObservation(1, "plan", tasks);
+
+      act(() => {
+        useEventStore.setState({
+          events: [event],
+          eventIds: new Set([1]),
+          uiEvents: [event],
+        });
+      });
+
+      expect(result.current.taskList).toEqual(tasks);
+      expect(result.current.hasTaskList).toBe(true);
+    });
+
+    it("returns hasTaskList=false when the latest plan has an empty task list", () => {
+      const event = createV0TaskTrackingObservation(1, "plan", []);
+
+      useEventStore.setState({
+        events: [event],
+        eventIds: new Set([1]),
+        uiEvents: [event],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual([]);
+      expect(result.current.hasTaskList).toBe(false);
+    });
+  });
+
+  describe("v1 events", () => {
+    it('returns the task list from a v1 TaskTrackerObservation with command="plan"', () => {
+      const tasks = [
+        { title: "First task", notes: "", status: "todo" as const },
+        {
+          title: "Second task",
+          notes: "some note",
+          status: "in_progress" as const,
+        },
+      ];
+      const event = createV1TaskTrackerObservation("1", "plan", tasks);
+
+      useEventStore.setState({
+        events: [event],
+        eventIds: new Set(["1"]),
+        uiEvents: [event],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual([
+        { id: "1", title: "First task", notes: undefined, status: "todo" },
+        {
+          id: "2",
+          title: "Second task",
+          notes: "some note",
+          status: "in_progress",
+        },
+      ]);
+      expect(result.current.hasTaskList).toBe(true);
+    });
+
+    it('ignores v1 TaskTrackerObservation with command !== "plan"', () => {
+      const tasks = [
+        { title: "First task", notes: "", status: "todo" as const },
+      ];
+      const event = createV1TaskTrackerObservation("1", "view", tasks);
+
+      useEventStore.setState({
+        events: [event],
+        eventIds: new Set(["1"]),
+        uiEvents: [event],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual([]);
+      expect(result.current.hasTaskList).toBe(false);
+    });
+
+    it("returns the latest v1 task list when multiple plan events exist", () => {
+      const earlyTasks = [
+        { title: "First task", notes: "", status: "todo" as const },
+      ];
+      const lateTasks = [
+        { title: "First task", notes: "", status: "done" as const },
+        { title: "New task", notes: "wip", status: "in_progress" as const },
+      ];
+
+      const event1 = createV1TaskTrackerObservation("1", "plan", earlyTasks);
+      const event2 = createV1TaskTrackerObservation("2", "plan", lateTasks);
+
+      useEventStore.setState({
+        events: [event1, event2],
+        eventIds: new Set(["1", "2"]),
+        uiEvents: [event1, event2],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual([
+        { id: "1", title: "First task", notes: undefined, status: "done" },
+        { id: "2", title: "New task", notes: "wip", status: "in_progress" },
+      ]);
+      expect(result.current.hasTaskList).toBe(true);
+    });
+
+    it("returns hasTaskList=false when the latest v1 plan has an empty task list", () => {
+      const event = createV1TaskTrackerObservation("1", "plan", []);
+
+      useEventStore.setState({
+        events: [event],
+        eventIds: new Set(["1"]),
+        uiEvents: [event],
+      });
+
+      const { result } = renderHook(() => useTaskList());
+
+      expect(result.current.taskList).toEqual([]);
+      expect(result.current.hasTaskList).toBe(false);
+    });
+  });
+});
@@ -2,7 +2,7 @@ import { render, screen, waitFor } from "@testing-library/react";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import userEvent from "@testing-library/user-event";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
-import { createRoutesStub } from "react-router";
+import { createRoutesStub, useSearchParams } from "react-router";
 import LoginPage from "#/routes/login";
 import OptionService from "#/api/option-service/option-service.api";
 import AuthService from "#/api/auth-service/auth-service.api";
@@ -80,6 +80,29 @@ const RouterStub = createRoutesStub([
  },
 ]);

+function DestinationStub() {
+  const [params] = useSearchParams();
+  const loginMethod = params.get("login_method");
+  return (
+    <div data-testid="destination-page">
+      {loginMethod && (
+        <span data-testid="login-method-param">{loginMethod}</span>
+      )}
+    </div>
+  );
+}
+
+const RouterStubWithDestination = createRoutesStub([
+  {
+    Component: LoginPage,
+    path: "/login",
+  },
+  {
+    Component: DestinationStub,
+    path: "/settings",
+  },
+]);
+
 const createWrapper = () => {
  const queryClient = new QueryClient({
    defaultOptions: {
@@ -282,7 +305,9 @@ describe("LoginPage", () => {
      await user.click(gitlabButton);

      // URL includes state parameter added by handleAuthRedirect
-      expect(window.location.href).toContain("https://gitlab.com/oauth/authorize");
+      expect(window.location.href).toContain(
+        "https://gitlab.com/oauth/authorize",
+      );
    });

    it("should redirect to Bitbucket auth URL when Bitbucket button is clicked", async () => {
@@ -347,6 +372,30 @@ describe("LoginPage", () => {
      );
    });

+    it("should preserve login_method param when redirecting authenticated users", async () => {
+      // Arrange
+      vi.spyOn(AuthService, "authenticate").mockResolvedValue(true);
+
+      // Act
+      render(
+        <RouterStubWithDestination
+          initialEntries={["/login?returnTo=/settings&login_method=github"]}
+        />,
+        { wrapper: createWrapper() },
+      );
+
+      // Assert
+      await waitFor(
+        () => {
+          expect(screen.getByTestId("destination-page")).toBeInTheDocument();
+          expect(screen.getByTestId("login-method-param")).toHaveTextContent(
+            "github",
+          );
+        },
+        { timeout: 2000 },
+      );
+    });
+
    it("should redirect OSS mode users to home", async () => {
      // @ts-expect-error - partial mock for testing
      vi.spyOn(OptionService, "getConfig").mockResolvedValue({
@@ -552,10 +601,12 @@ describe("LoginPage", () => {

    it("should pass buildOAuthStateData to LoginContent for OAuth state encoding", async () => {
      const user = userEvent.setup();
-      const mockBuildOAuthStateData = vi.fn((baseState: Record<string, string>) => ({
-        ...baseState,
-        invitation_token: "inv-test-token-12345",
-      }));
+      const mockBuildOAuthStateData = vi.fn(
+        (baseState: Record<string, string>) => ({
+          ...baseState,
+          invitation_token: "inv-test-token-12345",
+        }),
+      );

      useInvitationMock.mockReturnValue({
        invitationToken: "inv-test-token-12345",
@@ -585,10 +636,12 @@ describe("LoginPage", () => {

    it("should include invitation token in OAuth state when invitation is present", async () => {
      const user = userEvent.setup();
-      const mockBuildOAuthStateData = vi.fn((baseState: Record<string, string>) => ({
-        ...baseState,
-        invitation_token: "inv-test-token-12345",
-      }));
+      const mockBuildOAuthStateData = vi.fn(
+        (baseState: Record<string, string>) => ({
+          ...baseState,
+          invitation_token: "inv-test-token-12345",
+        }),
+      );

      useInvitationMock.mockReturnValue({
        invitationToken: "inv-test-token-12345",
@@ -634,9 +687,14 @@ describe("LoginPage", () => {
        clearInvitation: vi.fn(),
      });

-      render(<RouterStub initialEntries={["/login?invitation_token=inv-url-token-67890"]} />, {
-        wrapper: createWrapper(),
-      });
+      render(
+        <RouterStub
+          initialEntries={["/login?invitation_token=inv-url-token-67890"]}
+        />,
+        {
+          wrapper: createWrapper(),
+        },
+      );

      await waitFor(() => {
        expect(screen.getByText("AUTH$INVITATION_PENDING")).toBeInTheDocument();
@@ -366,6 +366,130 @@ describe("MainApp", () => {
    });
  });

+  describe("Re-authentication with stored login method", () => {
+    it("should show ReauthModal instead of redirecting to /login when login method exists", async () => {
+      // Arrange - user is unauthenticated but has a stored login method
+      vi.spyOn(AuthService, "authenticate").mockRejectedValue({
+        response: { status: 401 },
+        isAxiosError: true,
+      });
+
+      vi.stubGlobal("localStorage", {
+        getItem: vi.fn((key: string) => {
+          if (key === "openhands_login_method") {
+            return "github";
+          }
+          return null;
+        }),
+        setItem: vi.fn(),
+        removeItem: vi.fn(),
+        clear: vi.fn(),
+      });
+
+      // Act
+      renderWithLoginStub(RouterStubWithLogin, ["/"]);
+
+      // Assert - should show ReauthModal (with "Logging back in" text), not redirect to /login
+      await waitFor(
+        () => {
+          expect(screen.getByText("AUTH$LOGGING_BACK_IN")).toBeInTheDocument();
+        },
+        { timeout: 2000 },
+      );
+
+      // Login page should NOT be shown when login method exists
+      expect(screen.queryByTestId("login-page")).not.toBeInTheDocument();
+    });
+
+    it("should redirect to /login when no login method is stored", async () => {
+      // Arrange - user is unauthenticated and has no stored login method
+      vi.spyOn(AuthService, "authenticate").mockRejectedValue({
+        response: { status: 401 },
+        isAxiosError: true,
+      });
+
+      vi.stubGlobal("localStorage", {
+        getItem: vi.fn(() => null),
+        setItem: vi.fn(),
+        removeItem: vi.fn(),
+        clear: vi.fn(),
+      });
+
+      // Act
+      renderWithLoginStub(RouterStubWithLogin, ["/"]);
+
+      // Assert - should redirect to /login
+      await waitFor(
+        () => {
+          expect(screen.getByTestId("login-page")).toBeInTheDocument();
+        },
+        { timeout: 2000 },
+      );
+    });
+  });
+
+  describe("Loading states", () => {
+    it("should show loading spinner while config is loading without redirecting", async () => {
+      // Arrange - config never resolves (loading state)
+      vi.spyOn(OptionService, "getConfig").mockImplementation(
+        () => new Promise(() => {}),
+      );
+
+      vi.stubGlobal("localStorage", {
+        getItem: vi.fn((key: string) => {
+          if (key === "openhands_login_method") {
+            return "github";
+          }
+          return null;
+        }),
+        setItem: vi.fn(),
+        removeItem: vi.fn(),
+        clear: vi.fn(),
+      });
+
+      // Act
+      renderWithLoginStub(RouterStubWithLogin, ["/"]);
+
+      // Assert - should show loading spinner
+      await waitFor(() => {
+        expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
+      });
+
+      // Should NOT redirect to login while loading
+      expect(screen.queryByTestId("login-page")).not.toBeInTheDocument();
+    });
+
+    it("should show loading spinner while auth is loading without redirecting", async () => {
+      // Arrange - auth never resolves (loading state)
+      vi.spyOn(AuthService, "authenticate").mockImplementation(
+        () => new Promise(() => {}),
+      );
+
+      vi.stubGlobal("localStorage", {
+        getItem: vi.fn((key: string) => {
+          if (key === "openhands_login_method") {
+            return "github";
+          }
+          return null;
+        }),
+        setItem: vi.fn(),
+        removeItem: vi.fn(),
+        clear: vi.fn(),
+      });
+
+      // Act
+      renderWithLoginStub(RouterStubWithLogin, ["/"]);
+
+      // Assert - should show loading spinner
+      await waitFor(() => {
+        expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
+      });
+
+      // Should NOT redirect to login while loading
+      expect(screen.queryByTestId("login-page")).not.toBeInTheDocument();
+    });
+  });
+
  describe("Invitation URL Parameters", () => {
    beforeEach(() => {
      vi.spyOn(AuthService, "authenticate").mockRejectedValue({
@@ -0,0 +1,167 @@
+import { render, screen } from "@testing-library/react";
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import TaskListTab from "#/routes/task-list-tab";
+import { useEventStore } from "#/stores/use-event-store";
+import type { TaskTrackingObservation } from "#/types/core/observations";
+
+// Mock i18n
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({
+    t: (key: string) => {
+      const translations: Record<string, string> = {
+        COMMON$NO_TASKS: "No tasks yet",
+        TASK_TRACKING_OBSERVATION$TASK_NOTES: "Notes",
+      };
+      return translations[key] || key;
+    },
+  }),
+}));
+
+function createTaskTrackingObservation(
+  id: number,
+  tasks: TaskTrackingObservation["extras"]["task_list"],
+): TaskTrackingObservation {
+  return {
+    id,
+    source: "agent",
+    observation: "task_tracking",
+    message: "Task tracking update",
+    timestamp: `2025-07-01T00:00:0${id}Z`,
+    cause: 0,
+    content: "",
+    extras: {
+      command: "plan",
+      task_list: tasks,
+    },
+  };
+}
+
+function setTasks(tasks: TaskTrackingObservation["extras"]["task_list"]) {
+  const event = createTaskTrackingObservation(1, tasks);
+  useEventStore.setState({
+    events: [event],
+    eventIds: new Set([1]),
+    uiEvents: [event],
+  });
+}
+
+beforeEach(() => {
+  useEventStore.setState({
+    events: [],
+    eventIds: new Set(),
+    uiEvents: [],
+  });
+});
+
+describe("TaskListTab", () => {
+  it("renders empty state with icon and message when there are no tasks", () => {
+    const { container } = render(<TaskListTab />);
+
+    expect(screen.getByText("No tasks yet")).toBeInTheDocument();
+    // Empty state should show the check-circle icon (rendered as SVG)
+    expect(container.querySelector("svg")).toBeInTheDocument();
+  });
+
+  it("renders empty state message using Text component (span)", () => {
+    render(<TaskListTab />);
+
+    const message = screen.getByText("No tasks yet");
+    expect(message.tagName).toBe("SPAN");
+  });
+
+  it("renders task items when tasks exist", () => {
+    setTasks([
+      { id: "1", title: "Implement feature", status: "todo" },
+      { id: "2", title: "Write tests", status: "in_progress" },
+      { id: "3", title: "Deploy", status: "done" },
+    ]);
+
+    const { container } = render(<TaskListTab />);
+
+    expect(screen.getByText("Implement feature")).toBeInTheDocument();
+    expect(screen.getByText("Write tests")).toBeInTheDocument();
+    expect(screen.getByText("Deploy")).toBeInTheDocument();
+
+    const taskItems = container.querySelectorAll('[data-name="item"]');
+    expect(taskItems).toHaveLength(3);
+  });
+
+  it("does not display task IDs", () => {
+    setTasks([
+      { id: "task-1", title: "First task", status: "todo" },
+    ]);
+
+    render(<TaskListTab />);
+
+    expect(screen.queryByText(/task-1/)).not.toBeInTheDocument();
+  });
+
+  it("highlights in_progress tasks with a background", () => {
+    setTasks([
+      { id: "1", title: "Todo task", status: "todo" },
+      { id: "2", title: "Active task", status: "in_progress" },
+      { id: "3", title: "Done task", status: "done" },
+    ]);
+
+    render(<TaskListTab />);
+
+    // Find each task item via its text, then check the wrapper div
+    const activeItem = screen.getByText("Active task").closest("[data-name]");
+    const activeWrapper = activeItem?.parentElement;
+    expect(activeWrapper?.className).toContain("bg-[#2D3039]");
+
+    const todoItem = screen.getByText("Todo task").closest("[data-name]");
+    expect(todoItem?.parentElement?.className).not.toContain("bg-[#2D3039]");
+
+    const doneItem = screen.getByText("Done task").closest("[data-name]");
+    expect(doneItem?.parentElement?.className).not.toContain("bg-[#2D3039]");
+  });
+
+  it("displays task notes when present and omits when absent", () => {
+    setTasks([
+      {
+        id: "1",
+        title: "Task with notes",
+        status: "todo",
+        notes: "Important note",
+      },
+      { id: "2", title: "Task without notes", status: "todo" },
+    ]);
+
+    render(<TaskListTab />);
+
+    expect(screen.getByText("Notes: Important note")).toBeInTheDocument();
+    expect(screen.getAllByText(/^Notes:/)).toHaveLength(1);
+  });
+
+  it("uses the latest plan event when multiple exist", () => {
+    const event1 = createTaskTrackingObservation(1, [
+      { id: "1", title: "Old task", status: "todo" },
+    ]);
+    const event2 = createTaskTrackingObservation(2, [
+      { id: "1", title: "Updated task", status: "done" },
+      { id: "2", title: "New task", status: "in_progress" },
+    ]);
+
+    useEventStore.setState({
+      events: [event1, event2],
+      eventIds: new Set([1, 2]),
+      uiEvents: [event1, event2],
+    });
+
+    render(<TaskListTab />);
+
+    expect(screen.queryByText("Old task")).not.toBeInTheDocument();
+    expect(screen.getByText("Updated task")).toBeInTheDocument();
+    expect(screen.getByText("New task")).toBeInTheDocument();
+  });
+
+  it("renders as a scrollable main element when tasks exist", () => {
+    setTasks([{ id: "1", title: "A task", status: "todo" }]);
+
+    render(<TaskListTab />);
+
+    const main = screen.getByRole("main");
+    expect(main).toBeInTheDocument();
+  });
+});
@@ -0,0 +1,28 @@
+import { describe, it, expect } from "vitest";
+import { getGitPath } from "#/utils/get-git-path";
+
+describe("getGitPath", () => {
+  it("should return /workspace/project when no repository is selected", () => {
+    expect(getGitPath(null)).toBe("/workspace/project");
+    expect(getGitPath(undefined)).toBe("/workspace/project");
+  });
+
+  it("should handle standard owner/repo format (GitHub)", () => {
+    expect(getGitPath("OpenHands/OpenHands")).toBe("/workspace/project/OpenHands");
+    expect(getGitPath("facebook/react")).toBe("/workspace/project/react");
+  });
+
+  it("should handle nested group paths (GitLab)", () => {
+    expect(getGitPath("modernhealth/frontend-guild/pan")).toBe("/workspace/project/pan");
+    expect(getGitPath("group/subgroup/repo")).toBe("/workspace/project/repo");
+    expect(getGitPath("a/b/c/d/repo")).toBe("/workspace/project/repo");
+  });
+
+  it("should handle single segment paths", () => {
+    expect(getGitPath("repo")).toBe("/workspace/project/repo");
+  });
+
+  it("should handle empty string", () => {
+    expect(getGitPath("")).toBe("/workspace/project");
+  });
+});
@@ -1,12 +1,12 @@
 {
  "name": "openhands-frontend",
-  "version": "1.4.0",
+  "version": "1.5.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "openhands-frontend",
-      "version": "1.4.0",
+      "version": "1.5.0",
      "dependencies": {
        "@heroui/react": "2.8.7",
        "@microlink/react-json-view": "^1.27.1",
@@ -1,6 +1,6 @@
 {
  "name": "openhands-frontend",
-  "version": "1.4.0",
+  "version": "1.5.0",
  "private": true,
  "type": "module",
  "engines": {
@@ -35,23 +35,17 @@ export function TaskItem({ task }: TaskItemProps) {
  const isDoneStatus = task.status === "done";

  return (
-    <div
-      className="flex gap-[14px] items-center px-4 py-2 w-full"
-      data-name="item"
-    >
+    <div className="flex gap-2 items-center w-full" data-name="item">
      <div className="shrink-0">{icon}</div>
-      <div className="flex flex-col items-start justify-center leading-[20px] text-nowrap whitespace-pre font-normal">
+      <div className="flex flex-col items-start justify-center leading-[16px] text-nowrap whitespace-pre font-normal">
        <Typography.Text
          className={cn(
-            "text-[12px] text-white",
-            isDoneStatus && "text-[#A3A3A3]",
+            "text-[12px]",
+            isDoneStatus ? "text-[#A3A3A3]" : "text-white",
          )}
        >
          {task.title}
        </Typography.Text>
-        <Typography.Text className="text-[10px] text-[#A3A3A3] font-normal">
-          {t(I18nKey.TASK_TRACKING_OBSERVATION$TASK_ID)}: {task.id}
-        </Typography.Text>
        {task.notes && (
          <Typography.Text className="text-[10px] text-[#A3A3A3]">
            {t(I18nKey.TASK_TRACKING_OBSERVATION$TASK_NOTES)}: {task.notes}
@@ -87,6 +87,7 @@ export function AgentStatus({
  return (
    <div className={cn("flex items-center gap-1 min-w-0", className)}>
      <span
+        data-testid="agent-status-text"
        className="text-[11px] text-white font-normal leading-5 flex-1 min-w-0 max-w-full whitespace-normal break-words"
        title={t(statusCode)}
      >
@@ -16,8 +16,13 @@ const BrowserTab = lazy(() => import("#/routes/browser-tab"));
 const ServedTab = lazy(() => import("#/routes/served-tab"));
 const VSCodeTab = lazy(() => import("#/routes/vscode-tab"));
 const PlannerTab = lazy(() => import("#/routes/planner-tab"));
+const TaskListTab = lazy(() => import("#/routes/task-list-tab"));

 const TAB_CONFIG = {
+  tasklist: {
+    component: TaskListTab,
+    titleKey: I18nKey.COMMON$TASK_LIST,
+  },
  editor: {
    component: EditorTab,
    titleKey: I18nKey.COMMON$CHANGES,
@@ -27,7 +27,7 @@ export function ConversationTabNav({
      data-testid={`conversation-tab-${tabValue}`}
      className={cn(
        "flex items-center gap-2 rounded-md cursor-pointer",
-        "pl-1.5 pr-2 py-1",
+        "pl-1.5 pr-2 py-1 lg:py-1.5",
        "text-[#9299AA] bg-[#0D0F11]",
        isActive && "bg-[#25272D] text-white",
        isActive
@@ -13,6 +13,8 @@ import VSCodeIcon from "#/icons/vscode.svg?react";
 import PillIcon from "#/icons/pill.svg?react";
 import PillFillIcon from "#/icons/pill-fill.svg?react";
 import LessonPlanIcon from "#/icons/lesson-plan.svg?react";
+import DoubleCheckIcon from "#/icons/double-check.svg?react";
+import { useTaskList } from "#/hooks/use-task-list";

 interface ConversationTabsContextMenuProps {
  isOpen: boolean;
@@ -29,6 +31,8 @@ export function ConversationTabsContextMenu({
  const { state, setUnpinnedTabs } =
    useConversationLocalStorageState(conversationId);

+  const { hasTaskList } = useTaskList();
+
  const tabConfig = [
    {
      tab: "planner",
@@ -42,6 +46,14 @@ export function ConversationTabsContextMenu({
    { tab: "browser", icon: GlobeIcon, i18nKey: I18nKey.COMMON$BROWSER },
  ];

+  if (hasTaskList) {
+    tabConfig.unshift({
+      tab: "tasklist",
+      icon: DoubleCheckIcon,
+      i18nKey: I18nKey.COMMON$TASK_LIST,
+    });
+  }
+
  if (!isOpen) return null;

  const handleTabClick = (tab: string) => {
@@ -7,6 +7,7 @@ import GitChanges from "#/icons/git_changes.svg?react";
 import VSCodeIcon from "#/icons/vscode.svg?react";
 import ThreeDotsVerticalIcon from "#/icons/three-dots-vertical.svg?react";
 import LessonPlanIcon from "#/icons/lesson-plan.svg?react";
+import DoubleCheckIcon from "#/icons/double-check.svg?react";
 import { cn } from "#/utils/utils";
 import { useConversationLocalStorageState } from "#/utils/conversation-local-storage";
 import { ConversationTabNav } from "./conversation-tab-nav";
@@ -17,6 +18,7 @@ import { useConversationStore } from "#/stores/conversation-store";
 import { ConversationTabsContextMenu } from "./conversation-tabs-context-menu";
 import { useConversationId } from "#/hooks/use-conversation-id";
 import { useSelectConversationTab } from "#/hooks/use-select-conversation-tab";
+import { useTaskList } from "#/hooks/use-task-list";

 export function ConversationTabs() {
  const { conversationId } = useConversationId();
@@ -27,6 +29,8 @@ export function ConversationTabs() {
  const { state: persistedState } =
    useConversationLocalStorageState(conversationId);

+  const { hasTaskList } = useTaskList();
+
  const {
    selectTab,
    isTabActive,
@@ -120,6 +124,18 @@ export function ConversationTabs() {
    },
  ];

+  if (hasTaskList) {
+    tabs.unshift({
+      tabValue: "tasklist",
+      isActive: isTabActive("tasklist"),
+      icon: DoubleCheckIcon,
+      onClick: () => selectTab("tasklist"),
+      tooltipContent: t(I18nKey.COMMON$TASK_LIST),
+      tooltipAriaLabel: t(I18nKey.COMMON$TASK_LIST),
+      label: t(I18nKey.COMMON$TASK_LIST),
+    });
+  }
+
  // Filter out unpinned tabs
  const visibleTabs = tabs.filter(
    (tab) => !persistedState.unpinnedTabs.includes(tab.tabValue),
@@ -0,0 +1,64 @@
+import { useMemo } from "react";
+import { useEventStore } from "#/stores/use-event-store";
+import type { OHEvent } from "#/stores/use-event-store";
+import { isTaskTrackingObservation } from "#/types/core/guards";
+import type { OpenHandsParsedEvent } from "#/types/core";
+import { isObservationEvent } from "#/types/v1/type-guards";
+import type { OpenHandsEvent } from "#/types/v1/core";
+import type { TaskTrackerObservation } from "#/types/v1/core/base/observation";
+import type { ObservationEvent } from "#/types/v1/core/events/observation-event";
+
+export interface TaskListItem {
+  id: string;
+  title: string;
+  status: "todo" | "in_progress" | "done";
+  notes?: string;
+}
+
+function getTaskListFromEvent(event: OHEvent): TaskListItem[] | null {
+  // v0 event format: observation is a string "task_tracking"
+  const v0 = event as OpenHandsParsedEvent;
+  if (isTaskTrackingObservation(v0) && v0.extras.command === "plan") {
+    return v0.extras.task_list.map((t) => ({
+      id: t.id,
+      title: t.title,
+      status: t.status,
+      notes: t.notes,
+    }));
+  }
+
+  // v1 event format: observation is an object with kind "TaskTrackerObservation"
+  const v1 = event as OpenHandsEvent;
+  if (
+    isObservationEvent(v1) &&
+    v1.observation.kind === "TaskTrackerObservation"
+  ) {
+    const obs = (v1 as ObservationEvent<TaskTrackerObservation>).observation;
+    if (obs.command === "plan") {
+      return obs.task_list.map((t, i) => ({
+        id: String(i + 1),
+        title: t.title,
+        status: t.status,
+        notes: t.notes || undefined,
+      }));
+    }
+  }
+
+  return null;
+}
+
+export function useTaskList() {
+  const events = useEventStore((state) => state.events);
+
+  return useMemo(() => {
+    // Iterate in reverse to find the latest TaskTrackingObservation with command="plan"
+    for (let i = events.length - 1; i >= 0; i -= 1) {
+      const taskList = getTaskListFromEvent(events[i]);
+      if (taskList) {
+        return { taskList, hasTaskList: taskList.length > 0 };
+      }
+    }
+
+    return { taskList: [] as TaskListItem[], hasTaskList: false };
+  }, [events]);
+}
@@ -993,6 +993,8 @@ export enum I18nKey {
  COMMON$MORE_OPTIONS = "COMMON$MORE_OPTIONS",
  COMMON$CREATE_A_PLAN = "COMMON$CREATE_A_PLAN",
  COMMON$TASKS = "COMMON$TASKS",
+  COMMON$TASK_LIST = "COMMON$TASK_LIST",
+  COMMON$NO_TASKS = "COMMON$NO_TASKS",
  COMMON$PLAN_MD = "COMMON$PLAN_MD",
  COMMON$READ_MORE = "COMMON$READ_MORE",
  COMMON$BUILD = "COMMON$BUILD",
@@ -15891,6 +15891,38 @@
    "de": "Aufgaben",
    "uk": "Завдання"
  },
+  "COMMON$TASK_LIST": {
+    "en": "Task List",
+    "ja": "タスクリスト",
+    "zh-CN": "任务列表",
+    "zh-TW": "任務列表",
+    "ko-KR": "작업 목록",
+    "no": "Oppgaveliste",
+    "it": "Elenco attività",
+    "pt": "Lista de tarefas",
+    "es": "Lista de tareas",
+    "ar": "قائمة المهام",
+    "fr": "Liste des tâches",
+    "tr": "Görev listesi",
+    "de": "Aufgabenliste",
+    "uk": "Список завдань"
+  },
+  "COMMON$NO_TASKS": {
+    "en": "No tasks yet",
+    "ja": "タスクはまだありません",
+    "zh-CN": "暂无任务",
+    "zh-TW": "尚無任務",
+    "ko-KR": "아직 작업이 없습니다",
+    "no": "Ingen oppgaver ennå",
+    "it": "Nessuna attività",
+    "pt": "Nenhuma tarefa ainda",
+    "es": "Sin tareas aún",
+    "ar": "لا توجد مهام بعد",
+    "fr": "Aucune tâche pour le moment",
+    "tr": "Henüz görev yok",
+    "de": "Noch keine Aufgaben",
+    "uk": "Завдань поки немає"
+  },
  "COMMON$PLAN_MD": {
    "en": "Plan.md",
    "ja": "Plan.md",
@@ -0,0 +1,4 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <path d="M3 10.5L6.5 14L14 6" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"/>
+  <path d="M7 10.5L10.5 14L18 6" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
@@ -40,11 +40,18 @@ export default function LoginPage() {
  }, [config.isLoading, config.data?.app_mode, navigate]);

  // Redirect authenticated users away from login page
+  // Preserve login_method param so useAuthCallback can store it for auto-login
  React.useEffect(() => {
    if (!isAuthLoading && isAuthed) {
-      navigate(returnTo, { replace: true });
+      const loginMethod = searchParams.get("login_method");
+      let destination = returnTo;
+      if (loginMethod) {
+        const separator = returnTo.includes("?") ? "&" : "?";
+        destination = `${returnTo}${separator}login_method=${encodeURIComponent(loginMethod)}`;
+      }
+      navigate(destination, { replace: true });
    }
-  }, [isAuthed, isAuthLoading, navigate, returnTo]);
+  }, [isAuthed, isAuthLoading, navigate, returnTo, searchParams]);

  if (isAuthLoading || config.isLoading) {
    return (
@@ -173,14 +173,17 @@ export default function MainApp() {
    setLoginMethodExists(checkLoginMethodExists());
  }, [isAuthed, checkLoginMethodExists]);

+  // Show loading spinner while config or auth is loading
+  const isLoading = config.isLoading || isAuthLoading;
+
+  // Only decide to redirect AFTER loading completes
  const shouldRedirectToLogin =
-    config.isLoading ||
-    isAuthLoading ||
-    (!isAuthed &&
-      !isAuthError &&
-      !isOnIntermediatePage &&
-      config.data?.app_mode === "saas" &&
-      !loginMethodExists);
+    !isLoading &&
+    !isAuthed &&
+    !isAuthError &&
+    !isOnIntermediatePage &&
+    config.data?.app_mode === "saas" &&
+    !loginMethodExists;

  React.useEffect(() => {
    if (shouldRedirectToLogin) {
@@ -197,7 +200,8 @@ export default function MainApp() {
    }
  }, [shouldRedirectToLogin, pathname, searchParams, navigate]);

-  if (shouldRedirectToLogin) {
+  // Show loading spinner while loading OR when about to redirect
+  if (isLoading || shouldRedirectToLogin) {
    return (
      <div className="min-h-screen flex items-center justify-center bg-base">
        <LoadingSpinner size="large" />
@@ -0,0 +1,41 @@
+import { useTranslation } from "react-i18next";
+import { I18nKey } from "#/i18n/declaration";
+import CheckCircleIcon from "#/icons/u-check-circle.svg?react";
+import { TaskItem } from "#/components/features/chat/task-tracking/task-item";
+import { useTaskList } from "#/hooks/use-task-list";
+import { Text } from "#/ui/typography";
+import { cn } from "#/utils/utils";
+
+function TaskListTab() {
+  const { t } = useTranslation();
+  const { taskList } = useTaskList();
+
+  if (taskList.length === 0) {
+    return (
+      <div className="flex flex-col items-center justify-center w-full h-full p-10 gap-4">
+        <CheckCircleIcon width={109} height={109} color="#A1A1A1" />
+        <Text className="text-[#8D95A9] text-[19px] font-normal leading-5">
+          {t(I18nKey.COMMON$NO_TASKS)}
+        </Text>
+      </div>
+    );
+  }
+
+  return (
+    <main className="h-full overflow-y-auto flex flex-col custom-scrollbar-always">
+      {taskList.map((task) => (
+        <div
+          key={task.id}
+          className={cn(
+            "px-4 py-2",
+            task.status === "in_progress" && "bg-[#2D3039]",
+          )}
+        >
+          <TaskItem task={task} />
+        </div>
+      ))}
+    </main>
+  );
+}
+
+export default TaskListTab;
@@ -11,7 +11,8 @@ export type ConversationTab =
  | "served"
  | "vscode"
  | "terminal"
-  | "planner";
+  | "planner"
+  | "tasklist";

 export type ConversationMode = "code" | "plan";

@@ -3,7 +3,7 @@
 * If a repository is selected, returns /workspace/project/{repo-name}
 * Otherwise, returns /workspace/project
 *
- * @param selectedRepository The selected repository (e.g., "OpenHands/OpenHands" or "owner/repo")
+ * @param selectedRepository The selected repository (e.g., "OpenHands/OpenHands", "owner/repo", or "group/subgroup/repo")
 * @returns The git path to use
 */
 export function getGitPath(
@@ -13,10 +13,10 @@ export function getGitPath(
    return "/workspace/project";
  }

-  // Extract the repository name from "owner/repo" format
-  // The folder name is the second part after "/"
+  // Extract the repository name from the path
+  // The folder name is always the last part (handles both "owner/repo" and "group/subgroup/repo" formats)
  const parts = selectedRepository.split("/");
-  const repoName = parts.length > 1 ? parts[1] : parts[0];
+  const repoName = parts[parts.length - 1];

  return `/workspace/project/${repoName}`;
 }
@@ -16,6 +16,7 @@ export const VERIFIED_MODELS = [
  "gpt-5.2",
  "minimax-m2.5",
  "gemini-3-pro-preview",
+  "gemini-3.1-pro-preview",
  "gemini-3-flash-preview",
  "deepseek-chat",
  "devstral-medium-2512",
@@ -65,6 +66,7 @@ export const VERIFIED_OPENHANDS_MODELS = [
  "gpt-5.2",
  "minimax-m2.5",
  "gemini-3-pro-preview",
+  "gemini-3.1-pro-preview",
  "gemini-3-flash-preview",
  "devstral-medium-2512",
  "kimi-k2-0711-preview",
@@ -0,0 +1,73 @@
+# OpenHands Integration Tests - Environment Configuration
+# Copy this file to .env and fill in your values
+
+# =============================================================================
+# TARGET ENVIRONMENT
+# =============================================================================
+
+# Base URL for the application under test
+# Options:
+#   - https://staging.all-hands.dev (default)
+#   - https://app.all-hands.dev (production)
+#   - https://<feature_branch>.staging.all-hands.dev (feature branches)
+#   - http://localhost:3000 (local development)
+BASE_URL=https://staging.all-hands.dev
+
+# Environment name (optional, used for logging)
+# Options: staging, production, local, feature
+TEST_ENV=staging
+
+# =============================================================================
+# AUTHENTICATION
+# =============================================================================
+
+# Authentication method
+# Options:
+#   - github (default): Use GitHub OAuth
+#   - keycloak: Use Keycloak authentication
+#   - skip: Skip auth setup (use existing fixtures/auth.json)
+AUTH_METHOD=github
+
+# GitHub OAuth Credentials (required for AUTH_METHOD=github)
+# Create a test account specifically for automation testing
+GITHUB_TEST_USERNAME=your-github-test-username
+GITHUB_TEST_PASSWORD=your-github-test-password
+
+# GitHub 2FA TOTP Secret (optional, only if 2FA is enabled on test account)
+# You can get this when setting up 2FA - it's the secret key shown
+# GITHUB_TEST_TOTP_SECRET=your-totp-secret
+
+# Keycloak Credentials (required for AUTH_METHOD=keycloak)
+# KEYCLOAK_URL=https://auth.your-domain.com
+# KEYCLOAK_USERNAME=test-user
+# KEYCLOAK_PASSWORD=test-password
+
+# =============================================================================
+# TEST CONFIGURATION
+# =============================================================================
+
+# Repository to use for testing (should be a private repo the test user has access to)
+#TEST_REPO_URL=https://github.com/OpenHands/deploy
+
+# Prompt to send to the agent during smoke tests
+TEST_PROMPT="Flip a coin!"
+
+# =============================================================================
+# CI/CD CONFIGURATION
+# =============================================================================
+
+# Set to true when running in CI environment
+# CI=true
+
+# Playwright specific settings
+# PLAYWRIGHT_HTML_REPORT=playwright-report
+
+# =============================================================================
+# DEBUG OPTIONS
+# =============================================================================
+
+# Enable debug logging
+# DEBUG=pw:api
+
+# Keep browser open after tests (headed mode only)
+# PWDEBUG=1
@@ -0,0 +1,63 @@
+{
+  "parser": "@typescript-eslint/parser",
+  "parserOptions": {
+    "project": "./tsconfig.json"
+  },
+  "extends": [
+    "airbnb-base",
+    "airbnb-typescript/base",
+    "prettier",
+    "plugin:@typescript-eslint/eslint-recommended",
+    "plugin:@typescript-eslint/recommended"
+  ],
+  "plugins": ["prettier", "unused-imports"],
+  "rules": {
+    "unused-imports/no-unused-imports": "error",
+    "prettier/prettier": ["error"],
+    "@typescript-eslint/prefer-optional-chain": "error",
+    "import/extensions": [
+      "error",
+      "ignorePackages",
+      {
+        "": "never",
+        "ts": "never"
+      }
+    ]
+  },
+  "overrides": [
+    {
+      "files": ["*.ts"],
+      "rules": {
+        "no-param-reassign": [
+          "error",
+          {
+            "props": true,
+            "ignorePropertyModificationsFor": ["acc", "page"]
+          }
+        ],
+        "no-restricted-syntax": "off",
+        "import/prefer-default-export": "off",
+        "no-underscore-dangle": "off",
+        "import/no-extraneous-dependencies": "off",
+        "no-console": "off",
+        "no-await-in-loop": "off",
+        "class-methods-use-this": "off",
+        "@typescript-eslint/no-use-before-define": "off",
+        "no-plusplus": "off",
+        "no-promise-executor-return": "off",
+        "@typescript-eslint/no-throw-literal": "off",
+        "@typescript-eslint/no-shadow": "off",
+        "@typescript-eslint/no-unused-vars": [
+          "error",
+          {
+            "argsIgnorePattern": "^_",
+            "varsIgnorePattern": "^_"
+          }
+        ]
+      },
+      "parserOptions": {
+        "project": ["./tsconfig.json"]
+      }
+    }
+  ]
+}
@@ -0,0 +1,42 @@
+# Dependencies
+node_modules/
+
+# Environment files (contain secrets)
+.env
+.env.local
+.env.*.local
+
+# Test artifacts
+test-results/
+playwright-report/
+playwright/.cache/
+
+# Authentication state (contains session tokens)
+fixtures/auth.json
+
+# TypeScript build output
+*.tsbuildinfo
+dist/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Debug logs
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Screenshots and videos from test runs
+**/*.png
+**/*.webm
+**/*.mp4
+
+# Keep the fixtures directory structure
+!fixtures/.gitkeep
@@ -0,0 +1,3 @@
+{
+    "trailingComma": "all"
+}
@@ -0,0 +1,404 @@
+# OpenHands Integration Tests
+
+End-to-end smoke tests for OpenHands using [Playwright](https://playwright.dev/).
+
+## Overview
+
+These integration tests verify the critical path of the OpenHands application:
+
+1. ✅ User authentication (GitHub OAuth / Keycloak)
+2. ✅ Home screen accessibility
+3. ✅ Repository selection
+4. ✅ Conversation creation
+5. ✅ Agent interaction without errors
+6. ✅ GitHub Resolver integration (enterprise)
+
+## Quick Start
+
+### Prerequisites
+
+- Node.js 20.0.0 or higher
+- A GitHub test account with access to the test repository
+
+### Installation
+
+```bash
+cd integration_tests
+npm install
+npx playwright install chromium  # Install browser
+```
+
+### Configuration
+
+1. Copy the example environment file:
+
+```bash
+cp .env.example .env
+```
+
+2. Edit `.env` with your test credentials:
+
+```env
+GITHUB_TEST_USERNAME=your-test-account
+GITHUB_TEST_PASSWORD=your-test-password
+# Enable this only if your user has access to this repository
+#TEST_REPO_URL=https://github.com/OpenHands/deploy
+```
+
+### Run Tests
+
+```bash
+# Run all smoke tests against staging
+npm test
+
+# Run with visible browser
+npm run test:headed
+
+# Run with Playwright debugger
+npm run test:debug
+
+# Run with UI mode (interactive)
+npm run test:ui
+```
+
+## Environment Support
+
+Tests can run against different environments:
+
+### Staging (Default)
+
+```bash
+npm run test:staging
+# or
+BASE_URL=https://staging.all-hands.dev npm test
+```
+
+### Production
+
+```bash
+npm run test:production
+# or
+BASE_URL=https://app.all-hands.dev npm test
+```
+
+### Feature Branches
+
+```bash
+BASE_URL=https://my-feature-branch.staging.all-hands.dev npm test
+```
+
+### Local Development
+
+```bash
+BASE_URL=http://localhost:3000 npm test
+```
+
+## Authentication
+
+### GitHub OAuth (Default)
+
+The tests use GitHub OAuth for authentication. You'll need a dedicated test account.
+
+**Required Environment Variables:**
+- `GITHUB_TEST_USERNAME` - GitHub username
+- `GITHUB_TEST_PASSWORD` - GitHub password
+- `GITHUB_TEST_TOTP_SECRET` - (Optional) 2FA TOTP secret
+
+**Recommendations:**
+- Use a dedicated test account, not your personal account
+- Disable 2FA on the test account if possible (simpler automation)
+- If 2FA is required, you'll need to implement TOTP generation (see below)
+
+### Keycloak Authentication
+
+For Keycloak-based authentication:
+
+```env
+AUTH_METHOD=keycloak
+KEYCLOAK_URL=https://auth.your-domain.com
+KEYCLOAK_USERNAME=test-user
+KEYCLOAK_PASSWORD=test-password
+```
+
+### Reusing Authentication State
+
+After running tests once, the authentication state is saved to `fixtures/auth.json`. To skip the auth flow on subsequent runs:
+
+```env
+AUTH_METHOD=skip
+```
+
+## Project Structure
+
+```
+integration_tests/
+├── fixtures/               # Test fixtures and auth state
+│   └── auth.json          # Saved authentication state (generated)
+├── pages/                  # Page Object Models
+│   ├── BasePage.ts        # Base page with common utilities
+│   ├── HomePage.ts        # Home screen interactions
+│   ├── ConversationPage.ts # Conversation/chat interactions
+│   └── index.ts           # Page exports
+├── tests/                  # Test specifications
+│   ├── global-setup.ts    # Authentication setup
+│   └── smoke.spec.ts      # Smoke test suite
+├── utils/                  # Utility functions
+├── .env.example           # Environment configuration template
+├── playwright.config.ts   # Playwright configuration
+├── package.json           # Dependencies and scripts
+└── README.md              # This file
+```
+
+## Writing Tests
+
+### Using Page Objects
+
+```typescript
+import { test, expect } from "@playwright/test";
+import { HomePage, ConversationPage } from "../pages";
+
+test("example test", async ({ page }) => {
+  const homePage = new HomePage(page);
+  const conversationPage = new ConversationPage(page);
+
+  // Navigate and verify home screen
+  await homePage.goto();
+  await expect(homePage.homeScreen).toBeVisible();
+
+  // Start a conversation
+  await homePage.selectRepository("https://github.com/owner/repo");
+  await homePage.startNewConversation();
+
+  // Interact with agent
+  await conversationPage.waitForConversationReady();
+  await conversationPage.executePrompt("Your prompt here");
+  await conversationPage.verifyNoErrors();
+});
+```
+
+### Test Tags
+
+Tests are organized with tags:
+
+- `@smoke` - Core smoke tests (run by default)
+- `@critical` - Critical functionality that must always work
+
+```bash
+# Run only smoke tests
+npm run test:smoke
+
+# Run specific tag
+npx playwright test --grep @critical
+```
+
+## CI/CD Integration
+
+### GitHub Actions
+
+The tests can be run in GitHub Actions. See `.github/workflows/smoke-tests.yml`.
+
+**Required Secrets:**
+- `GITHUB_TEST_USERNAME` - Test account username
+- `GITHUB_TEST_PASSWORD` - Test account password
+
+### Example Workflow
+
+```yaml
+name: Smoke Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  schedule:
+    - cron: '0 */6 * * *'  # Every 6 hours
+
+jobs:
+  smoke-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+
+      - name: Install dependencies
+        working-directory: ./integration_tests
+        run: npm ci
+
+      - name: Install Playwright
+        working-directory: ./integration_tests
+        run: npx playwright install --with-deps chromium
+
+      - name: Run smoke tests
+        working-directory: ./integration_tests
+        env:
+          BASE_URL: https://staging.all-hands.dev
+          GITHUB_TEST_USERNAME: ${{ secrets.GITHUB_TEST_USERNAME }}
+          GITHUB_TEST_PASSWORD: ${{ secrets.GITHUB_TEST_PASSWORD }}
+        run: npm test
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: integration_tests/playwright-report/
+          retention-days: 30
+```
+
+## Troubleshooting
+
+### Authentication Fails
+
+1. Verify credentials are correct
+2. Check if 2FA is enabled (need TOTP secret)
+3. Check if account is locked or needs verification
+4. Try running `AUTH_METHOD=skip` with manual login first
+
+### Tests Timeout
+
+1. Increase timeout in `playwright.config.ts`
+2. Check if the environment is accessible
+3. Check agent response times
+
+### Debug Mode
+
+```bash
+# Run with headed browser and pause on failure
+PWDEBUG=1 npm test
+
+# Generate test code interactively
+npm run codegen
+```
+
+### View Test Report
+
+```bash
+npm run report
+```
+
+## Adding 2FA Support
+
+If your test account requires 2FA, install `otplib`:
+
+```bash
+npm install otplib
+```
+
+Then update `global-setup.ts`:
+
+```typescript
+import { authenticator } from 'otplib';
+
+async function generateTOTP(secret: string): Promise<string> {
+  return authenticator.generate(secret);
+}
+```
+
+## GitHub Resolver Integration Tests
+
+The GitHub Resolver tests verify the end-to-end flow of the resolver integration, where GitHub webhooks trigger OpenHands to work on issues and pull requests.
+
+### Architecture
+
+The tests use a **Mock GitHub Server** instead of connecting to the real GitHub API. This allows:
+
+- Complete control over webhook payloads and responses
+- Testing without requiring real GitHub credentials or installations
+- Isolation from GitHub's rate limits and service availability
+- Reproducible test scenarios
+
+### Mock GitHub Server
+
+The mock server (`mocks/github-mock-server.ts`) simulates:
+
+- GitHub REST API endpoints (repos, issues, comments, reactions)
+- GitHub App installation token generation
+- Webhook signature verification
+- Recording of outgoing responses (comments posted by the resolver)
+
+### Running GitHub Resolver Tests
+
+1. **Start the OpenHands application with enterprise features:**
+
+```bash
+# From the project root
+cd enterprise
+make start-backend
+```
+
+2. **Configure environment variables:**
+
+```bash
+# In integration_tests/.env
+GITHUB_APP_WEBHOOK_SECRET=test-webhook-secret
+APP_PORT=12000
+MOCK_GITHUB_PORT=9999
+```
+
+3. **Run the tests:**
+
+```bash
+cd integration_tests
+npm run test:github-resolver
+```
+
+### Mock Server Standalone Mode
+
+You can run the mock GitHub server standalone for debugging:
+
+```bash
+npm run mock:github
+```
+
+This starts the server on port 9999 (configurable via `MOCK_GITHUB_PORT`).
+
+### Test Endpoints
+
+The mock server exposes test control endpoints:
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/_health` | GET | Health check |
+| `/_test/webhook-events` | GET | Get recorded webhook events |
+| `/_test/outgoing-responses` | GET | Get responses posted by resolver |
+| `/_test/clear-events` | POST | Clear recorded events |
+| `/_test/reset` | POST | Reset all mock data |
+| `/_test/trigger-webhook` | POST | Trigger a webhook to target URL |
+
+### Test Scenarios
+
+The GitHub Resolver tests cover:
+
+1. **Issue Labeled** - Adding the "openhands" label to an issue
+2. **Issue Comment** - Commenting "@openhands" on an issue
+3. **PR Review Comment** - Commenting "@openhands" on a PR review
+4. **Error Handling** - Invalid signatures, missing installation IDs
+
+### Customizing Test Data
+
+Edit `mocks/github-mock-server.ts` to modify the default test data:
+
+- Repository information
+- Issue content
+- Installation configurations
+
+## Best Practices
+
+1. **Use dedicated test accounts** - Don't use personal accounts
+2. **Keep credentials secure** - Never commit `.env` files
+3. **Run tests sequentially** - Smoke tests share state
+4. **Clean up after tests** - Stop agents, close conversations
+5. **Use meaningful assertions** - Check for specific elements
+6. **Add screenshots on failure** - Helps debug CI failures
+
+## Contributing
+
+When adding new tests:
+
+1. Add new Page Objects for new pages/features
+2. Follow existing naming conventions
+3. Use appropriate test tags
+4. Document any new environment variables
+5. Update this README if needed
@@ -0,0 +1,2 @@
+# This file ensures the fixtures directory is tracked by git
+# The auth.json file will be generated during test setup
@@ -0,0 +1,705 @@
+/**
+ * Mock GitHub Server for Integration Testing
+ *
+ * This server simulates GitHub API endpoints used by the OpenHands resolver:
+ * - GitHub App webhooks (issue labeled, issue comment, PR comment, etc.)
+ * - GitHub REST API endpoints (repos, issues, comments, pulls)
+ * - GitHub GraphQL API
+ *
+ * The mock server allows testing the resolver integration without connecting
+ * to the real GitHub service.
+ */
+
+import http from "http";
+import crypto from "crypto";
+
+// Types for mock data
+interface MockIssue {
+  number: number;
+  title: string;
+  body: string;
+  state: "open" | "closed";
+  labels: Array<{ name: string; id: number }>;
+  user: { login: string; id: number };
+  created_at: string;
+  updated_at: string;
+  comments: MockComment[];
+  reactions: string[];
+}
+
+interface MockComment {
+  id: number;
+  body: string;
+  user: { login: string; id: number };
+  created_at: string;
+}
+
+interface MockRepository {
+  id: number;
+  name: string;
+  full_name: string;
+  private: boolean;
+  owner: { login: string; id: number };
+  default_branch: string;
+  node_id: string;
+}
+
+interface MockInstallation {
+  id: number;
+  account: { login: string; id: number };
+  repositories: MockRepository[];
+  access_token: string;
+}
+
+interface WebhookEvent {
+  action: string;
+  payload: Record<string, unknown>;
+  timestamp: string;
+}
+
+// Mock data store
+class MockGitHubDataStore {
+  private repositories: Map<string, MockRepository> = new Map();
+
+  private issues: Map<string, Map<number, MockIssue>> = new Map();
+
+  private installations: Map<number, MockInstallation> = new Map();
+
+  private webhookEvents: WebhookEvent[] = [];
+
+  private nextCommentId = 1000;
+
+  private outgoingWebhookResponses: Array<{
+    body: string;
+    timestamp: string;
+  }> = [];
+
+  constructor() {
+    this.initializeDefaultData();
+  }
+
+  private initializeDefaultData() {
+    // Create a default test repository
+    const testRepo: MockRepository = {
+      id: 123456789,
+      name: "test-repo",
+      full_name: "test-owner/test-repo",
+      private: false,
+      owner: { login: "test-owner", id: 1000 },
+      default_branch: "main",
+      node_id: "R_kgDOTest123",
+    };
+    this.repositories.set(testRepo.full_name, testRepo);
+
+    // Create a test issue
+    const testIssue: MockIssue = {
+      number: 1,
+      title: "Test Issue for OpenHands Resolver",
+      body: "This is a test issue to verify the resolver integration works correctly. Please add a README file.",
+      state: "open",
+      labels: [],
+      user: { login: "test-user", id: 2000 },
+      created_at: new Date().toISOString(),
+      updated_at: new Date().toISOString(),
+      comments: [],
+      reactions: [],
+    };
+    this.issues.set(testRepo.full_name, new Map([[1, testIssue]]));
+
+    // Create a default installation
+    const testInstallation: MockInstallation = {
+      id: 12345,
+      account: { login: "test-owner", id: 1000 },
+      repositories: [testRepo],
+      access_token: "ghs_mock_installation_token_12345",
+    };
+    this.installations.set(testInstallation.id, testInstallation);
+  }
+
+  getRepository(fullName: string): MockRepository | undefined {
+    return this.repositories.get(fullName);
+  }
+
+  getIssue(fullName: string, issueNumber: number): MockIssue | undefined {
+    return this.issues.get(fullName)?.get(issueNumber);
+  }
+
+  getIssues(fullName: string): MockIssue[] {
+    const repoIssues = this.issues.get(fullName);
+    return repoIssues ? Array.from(repoIssues.values()) : [];
+  }
+
+  addComment(
+    fullName: string,
+    issueNumber: number,
+    body: string,
+    user: { login: string; id: number },
+  ): MockComment {
+    const issue = this.getIssue(fullName, issueNumber);
+    if (!issue) throw new Error(`Issue not found: ${fullName}#${issueNumber}`);
+
+    const comment: MockComment = {
+      id: this.nextCommentId++,
+      body,
+      user,
+      created_at: new Date().toISOString(),
+    };
+    issue.comments.push(comment);
+    issue.updated_at = new Date().toISOString();
+    return comment;
+  }
+
+  addReaction(fullName: string, issueNumber: number, reaction: string): void {
+    const issue = this.getIssue(fullName, issueNumber);
+    if (issue) {
+      issue.reactions.push(reaction);
+    }
+  }
+
+  addLabel(fullName: string, issueNumber: number, label: string): void {
+    const issue = this.getIssue(fullName, issueNumber);
+    if (issue) {
+      issue.labels.push({ name: label, id: Date.now() });
+      issue.updated_at = new Date().toISOString();
+    }
+  }
+
+  getInstallation(id: number): MockInstallation | undefined {
+    return this.installations.get(id);
+  }
+
+  getAllRepositories(): MockRepository[] {
+    return Array.from(this.repositories.values());
+  }
+
+  recordWebhookEvent(action: string, payload: Record<string, unknown>): void {
+    this.webhookEvents.push({
+      action,
+      payload,
+      timestamp: new Date().toISOString(),
+    });
+  }
+
+  getWebhookEvents(): WebhookEvent[] {
+    return this.webhookEvents;
+  }
+
+  recordOutgoingWebhookResponse(body: string): void {
+    this.outgoingWebhookResponses.push({
+      body,
+      timestamp: new Date().toISOString(),
+    });
+  }
+
+  getOutgoingWebhookResponses(): Array<{ body: string; timestamp: string }> {
+    return this.outgoingWebhookResponses;
+  }
+
+  clearEvents(): void {
+    this.webhookEvents = [];
+    this.outgoingWebhookResponses = [];
+  }
+
+  reset(): void {
+    this.repositories.clear();
+    this.issues.clear();
+    this.installations.clear();
+    this.webhookEvents = [];
+    this.outgoingWebhookResponses = [];
+    this.nextCommentId = 1000;
+    this.initializeDefaultData();
+  }
+}
+
+const dataStore = new MockGitHubDataStore();
+
+// Webhook secret for signature verification
+const WEBHOOK_SECRET =
+  process.env.MOCK_GITHUB_WEBHOOK_SECRET || "test-webhook-secret";
+
+// Generate webhook signature
+function generateWebhookSignature(payload: string): string {
+  const hmac = crypto.createHmac("sha256", WEBHOOK_SECRET);
+  hmac.update(payload);
+  return `sha256=${hmac.digest("hex")}`;
+}
+
+// Parse URL path and extract params
+function parseRoute(
+  url: string,
+  pattern: RegExp,
+): Record<string, string> | null {
+  const match = url.match(pattern);
+  if (!match) return null;
+  return match.groups || {};
+}
+
+// JSON response helper
+function jsonResponse(
+  res: http.ServerResponse,
+  data: unknown,
+  status = 200,
+): void {
+  res.writeHead(status, { "Content-Type": "application/json" });
+  res.end(JSON.stringify(data));
+}
+
+// Parse request body
+async function parseBody(req: http.IncomingMessage): Promise<string> {
+  return new Promise((resolve, reject) => {
+    let body = "";
+    req.on("data", (chunk) => {
+      body += chunk;
+    });
+    req.on("end", () => resolve(body));
+    req.on("error", reject);
+  });
+}
+
+// Request handlers
+const handlers: Array<{
+  method: string;
+  pattern: RegExp;
+  handler: (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    params: Record<string, string>,
+    body?: unknown,
+  ) => Promise<void> | void;
+}> = [
+  // GitHub App root endpoint
+  {
+    method: "GET",
+    pattern: /^\/app$/,
+    handler: (_req, res) => {
+      jsonResponse(res, {
+        id: 123456,
+        slug: "openhands-test-app",
+        name: "OpenHands Test App",
+        owner: { login: "test-owner", id: 1000 },
+        permissions: {
+          issues: "write",
+          pull_requests: "write",
+          contents: "write",
+        },
+      });
+    },
+  },
+
+  // Get repository
+  {
+    method: "GET",
+    pattern: /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const repo = dataStore.getRepository(fullName);
+      if (repo) {
+        jsonResponse(res, repo);
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Get issue
+  {
+    method: "GET",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issue = dataStore.getIssue(fullName, parseInt(params.number, 10));
+      if (issue) {
+        const repo = dataStore.getRepository(fullName);
+        jsonResponse(res, {
+          ...issue,
+          url: `https://api.github.com/repos/${fullName}/issues/${issue.number}`,
+          html_url: `https://github.com/${fullName}/issues/${issue.number}`,
+          repository: repo,
+        });
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // List issues
+  {
+    method: "GET",
+    pattern: /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issues = dataStore.getIssues(fullName);
+      jsonResponse(res, issues);
+    },
+  },
+
+  // Get issue comments
+  {
+    method: "GET",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/comments$/,
+    handler: (_req, res, params) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issue = dataStore.getIssue(fullName, parseInt(params.number, 10));
+      if (issue) {
+        jsonResponse(res, issue.comments);
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Create issue comment
+  {
+    method: "POST",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/comments$/,
+    handler: async (_req, res, params, body) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issueNumber = parseInt(params.number, 10);
+      const requestBody = body as { body: string };
+
+      try {
+        const comment = dataStore.addComment(
+          fullName,
+          issueNumber,
+          requestBody.body,
+          {
+            login: "openhands[bot]",
+            id: 99999,
+          },
+        );
+
+        // Record this as an outgoing response (the resolver posting back)
+        dataStore.recordOutgoingWebhookResponse(requestBody.body);
+
+        jsonResponse(res, comment, 201);
+      } catch {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Create issue reaction
+  {
+    method: "POST",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/reactions$/,
+    handler: async (_req, res, params, body) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issueNumber = parseInt(params.number, 10);
+      const requestBody = body as { content: string };
+
+      dataStore.addReaction(fullName, issueNumber, requestBody.content);
+      jsonResponse(res, { id: Date.now(), content: requestBody.content }, 201);
+    },
+  },
+
+  // Add issue label
+  {
+    method: "POST",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/issues\/(?<number>\d+)\/labels$/,
+    handler: async (_req, res, params, body) => {
+      const fullName = `${params.owner}/${params.repo}`;
+      const issueNumber = parseInt(params.number, 10);
+      const requestBody = body as { labels: string[] };
+
+      const issue = dataStore.getIssue(fullName, issueNumber);
+      if (issue) {
+        requestBody.labels.forEach((label) =>
+          dataStore.addLabel(fullName, issueNumber, label),
+        );
+        jsonResponse(res, issue.labels, 201);
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Get installation access token
+  {
+    method: "POST",
+    pattern: /^\/app\/installations\/(?<installation_id>\d+)\/access_tokens$/,
+    handler: (_req, res, params) => {
+      const installation = dataStore.getInstallation(
+        parseInt(params.installation_id, 10),
+      );
+      if (installation) {
+        jsonResponse(
+          res,
+          {
+            token: installation.access_token,
+            expires_at: new Date(Date.now() + 3600000).toISOString(),
+            permissions: {
+              issues: "write",
+              pull_requests: "write",
+              contents: "write",
+            },
+            repository_selection: "all",
+          },
+          201,
+        );
+      } else {
+        jsonResponse(res, { message: "Not Found" }, 404);
+      }
+    },
+  },
+
+  // Get installation repositories
+  {
+    method: "GET",
+    pattern: /^\/installation\/repositories$/,
+    handler: (_req, res) => {
+      // Return all repositories from all installations
+      const repos = dataStore.getAllRepositories();
+      jsonResponse(res, {
+        total_count: repos.length,
+        repositories: repos,
+      });
+    },
+  },
+
+  // Get user
+  {
+    method: "GET",
+    pattern: /^\/user$/,
+    handler: (_req, res) => {
+      jsonResponse(res, {
+        id: 2000,
+        login: "test-user",
+        avatar_url: "https://avatars.githubusercontent.com/u/2000",
+        name: "Test User",
+        email: "test-user@example.com",
+      });
+    },
+  },
+
+  // Get user by username
+  {
+    method: "GET",
+    pattern: /^\/users\/(?<username>[^/]+)$/,
+    handler: (_req, res, params) => {
+      jsonResponse(res, {
+        id: 2000,
+        login: params.username,
+        avatar_url: `https://avatars.githubusercontent.com/u/2000`,
+        name: params.username,
+      });
+    },
+  },
+
+  // Get repository collaborator permission
+  {
+    method: "GET",
+    pattern:
+      /^\/repos\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/collaborators\/(?<username>[^/]+)\/permission$/,
+    handler: (_req, res) => {
+      jsonResponse(res, {
+        permission: "write",
+        user: { login: "test-user", id: 2000 },
+      });
+    },
+  },
+
+  // GraphQL endpoint
+  {
+    method: "POST",
+    pattern: /^\/graphql$/,
+    handler: async (_req, res, _params, _body) => {
+      // Return a basic response for common queries
+      // The body would contain { query: string, variables?: Record<string, unknown> }
+      jsonResponse(res, {
+        data: {
+          repository: {
+            id: "R_kgDOTest123",
+            name: "test-repo",
+            owner: { login: "test-owner" },
+          },
+        },
+      });
+    },
+  },
+
+  // Test control endpoints - Get webhook events
+  {
+    method: "GET",
+    pattern: /^\/_test\/webhook-events$/,
+    handler: (_req, res) => {
+      jsonResponse(res, dataStore.getWebhookEvents());
+    },
+  },
+
+  // Test control endpoints - Get outgoing webhook responses
+  {
+    method: "GET",
+    pattern: /^\/_test\/outgoing-responses$/,
+    handler: (_req, res) => {
+      jsonResponse(res, dataStore.getOutgoingWebhookResponses());
+    },
+  },
+
+  // Test control endpoints - Clear events
+  {
+    method: "POST",
+    pattern: /^\/_test\/clear-events$/,
+    handler: (_req, res) => {
+      dataStore.clearEvents();
+      jsonResponse(res, { status: "cleared" });
+    },
+  },
+
+  // Test control endpoints - Reset data
+  {
+    method: "POST",
+    pattern: /^\/_test\/reset$/,
+    handler: (_req, res) => {
+      dataStore.reset();
+      jsonResponse(res, { status: "reset" });
+    },
+  },
+
+  // Test control endpoints - Trigger webhook
+  {
+    method: "POST",
+    pattern: /^\/_test\/trigger-webhook$/,
+    handler: async (req, res, _params, body) => {
+      const { targetUrl, eventType, payload } = body as {
+        targetUrl: string;
+        eventType: string;
+        payload: Record<string, unknown>;
+      };
+
+      // Record the webhook event
+      dataStore.recordWebhookEvent(eventType, payload);
+
+      // Send the webhook to the target URL
+      const payloadString = JSON.stringify(payload);
+      const signature = generateWebhookSignature(payloadString);
+
+      try {
+        const response = await fetch(targetUrl, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "X-GitHub-Event": eventType,
+            "X-Hub-Signature-256": signature,
+            "X-GitHub-Delivery": crypto.randomUUID(),
+          },
+          body: payloadString,
+        });
+
+        const responseText = await response.text();
+        jsonResponse(res, {
+          status: "sent",
+          targetUrl,
+          eventType,
+          responseStatus: response.status,
+          responseBody: responseText,
+        });
+      } catch (error) {
+        jsonResponse(
+          res,
+          {
+            status: "error",
+            error: (error as Error).message,
+          },
+          500,
+        );
+      }
+    },
+  },
+
+  // Health check
+  {
+    method: "GET",
+    pattern: /^\/_health$/,
+    handler: (_req, res) => {
+      jsonResponse(res, { status: "healthy" });
+    },
+  },
+];
+
+// Create HTTP server
+const server = http.createServer(async (req, res) => {
+  const url = req.url || "/";
+  const method = req.method || "GET";
+
+  // Handle CORS preflight
+  if (method === "OPTIONS") {
+    res.writeHead(204, {
+      "Access-Control-Allow-Origin": "*",
+      "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
+      "Access-Control-Allow-Headers": "*",
+    });
+    res.end();
+    return;
+  }
+
+  // Add CORS headers to all responses
+  res.setHeader("Access-Control-Allow-Origin", "*");
+  res.setHeader("Access-Control-Allow-Headers", "*");
+
+  // Parse body for POST/PUT requests
+  let body: unknown;
+  if (method === "POST" || method === "PUT") {
+    const rawBody = await parseBody(req);
+    try {
+      body = JSON.parse(rawBody);
+    } catch {
+      body = rawBody;
+    }
+  }
+
+  // Try to match a handler
+  for (const handler of handlers) {
+    if (handler.method === method) {
+      const params = parseRoute(url.split("?")[0], handler.pattern);
+      if (params !== null) {
+        try {
+          await handler.handler(req, res, params, body);
+          return;
+        } catch (error) {
+          console.error(`Error handling ${method} ${url}:`, error);
+          jsonResponse(res, { error: "Internal Server Error" }, 500);
+          return;
+        }
+      }
+    }
+  }
+
+  // No handler found
+  console.log(`No handler for ${method} ${url}`);
+  jsonResponse(res, { message: "Not Found", path: url }, 404);
+});
+
+// Start server
+const PORT = parseInt(process.env.MOCK_GITHUB_PORT || "9999", 10);
+
+server.listen(PORT, () => {
+  console.log(`Mock GitHub Server running on port ${PORT}`);
+  console.log(`Webhook secret: ${WEBHOOK_SECRET}`);
+  console.log("\nAvailable endpoints:");
+  console.log("  GET  /_health                    - Health check");
+  console.log(
+    "  GET  /_test/webhook-events       - Get recorded webhook events",
+  );
+  console.log(
+    "  GET  /_test/outgoing-responses   - Get responses posted by the resolver",
+  );
+  console.log("  POST /_test/clear-events         - Clear recorded events");
+  console.log("  POST /_test/reset                - Reset all mock data");
+  console.log(
+    "  POST /_test/trigger-webhook      - Trigger a webhook to target URL",
+  );
+  console.log("\nGitHub API endpoints:");
+  console.log("  GET  /repos/:owner/:repo");
+  console.log("  GET  /repos/:owner/:repo/issues/:number");
+  console.log("  POST /repos/:owner/:repo/issues/:number/comments");
+  console.log("  POST /repos/:owner/:repo/issues/:number/reactions");
+  console.log("  POST /app/installations/:id/access_tokens");
+  console.log("  POST /graphql");
+});
+
+export { server, dataStore, generateWebhookSignature, WEBHOOK_SECRET };
@@ -0,0 +1,281 @@
+/**
+ * GitHub Webhook Payload Templates
+ *
+ * These templates mirror the webhook payloads that GitHub sends for various events.
+ * They're used to test the OpenHands resolver integration.
+ */
+
+export interface GitHubWebhookPayload {
+  action: string;
+  installation: { id: number };
+  repository: {
+    id: number;
+    name: string;
+    full_name: string;
+    private: boolean;
+    owner: { login: string; id: number };
+    default_branch: string;
+  };
+  sender: { login: string; id: number };
+  [key: string]: unknown;
+}
+
+export interface IssuePayload extends GitHubWebhookPayload {
+  issue: {
+    number: number;
+    title: string;
+    body: string;
+    state: string;
+    labels: Array<{ name: string; id: number }>;
+    user: { login: string; id: number };
+  };
+  label?: { name: string; id: number };
+}
+
+export interface IssueCommentPayload extends GitHubWebhookPayload {
+  issue: {
+    number: number;
+    title: string;
+    body: string;
+    state: string;
+    labels: Array<{ name: string; id: number }>;
+    user: { login: string; id: number };
+    pull_request?: { url: string };
+  };
+  comment: {
+    id: number;
+    body: string;
+    user: { login: string; id: number };
+  };
+}
+
+export interface PullRequestReviewCommentPayload extends GitHubWebhookPayload {
+  pull_request: {
+    number: number;
+    title: string;
+    body: string;
+    state: string;
+    head: { ref: string; sha: string };
+    base: { ref: string };
+    user: { login: string; id: number };
+  };
+  comment: {
+    id: number;
+    node_id: string;
+    body: string;
+    path: string;
+    line: number;
+    user: { login: string; id: number };
+  };
+}
+
+/**
+ * Create a base webhook payload with common fields
+ */
+function createBasePayload(params: {
+  installationId?: number;
+  repositoryId?: number;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+  isPrivate?: boolean;
+}): GitHubWebhookPayload {
+  const {
+    installationId = 12345,
+    repositoryId = 123456789,
+    repositoryName = "test-repo",
+    repositoryOwner = "test-owner",
+    senderLogin = "test-user",
+    senderId = 2000,
+    isPrivate = false,
+  } = params;
+
+  return {
+    action: "",
+    installation: { id: installationId },
+    repository: {
+      id: repositoryId,
+      name: repositoryName,
+      full_name: `${repositoryOwner}/${repositoryName}`,
+      private: isPrivate,
+      owner: { login: repositoryOwner, id: 1000 },
+      default_branch: "main",
+    },
+    sender: { login: senderLogin, id: senderId },
+  };
+}
+
+/**
+ * Create a payload for an issue being labeled with the OpenHands label
+ */
+export function createIssueLabeledPayload(params: {
+  installationId?: number;
+  issueNumber?: number;
+  issueTitle?: string;
+  issueBody?: string;
+  labelName?: string;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+}): IssuePayload {
+  const {
+    issueNumber = 1,
+    issueTitle = "Test Issue for OpenHands Resolver",
+    issueBody = "This is a test issue. Please add a README file.",
+    labelName = "openhands",
+    senderLogin = "test-user",
+    senderId = 2000,
+    ...rest
+  } = params;
+
+  const base = createBasePayload({ senderLogin, senderId, ...rest });
+  const label = { name: labelName, id: Date.now() };
+
+  return {
+    ...base,
+    action: "labeled",
+    issue: {
+      number: issueNumber,
+      title: issueTitle,
+      body: issueBody,
+      state: "open",
+      labels: [label],
+      user: { login: senderLogin, id: senderId },
+    },
+    label,
+  };
+}
+
+/**
+ * Create a payload for an issue comment mentioning @openhands
+ */
+export function createIssueCommentPayload(params: {
+  installationId?: number;
+  issueNumber?: number;
+  issueTitle?: string;
+  issueBody?: string;
+  commentBody?: string;
+  commentId?: number;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+  isPullRequest?: boolean;
+}): IssueCommentPayload {
+  const {
+    issueNumber = 1,
+    issueTitle = "Test Issue for OpenHands Resolver",
+    issueBody = "This is a test issue.",
+    commentBody = "@openhands please add a README file",
+    commentId = 1001,
+    senderLogin = "test-user",
+    senderId = 2000,
+    isPullRequest = false,
+    ...rest
+  } = params;
+
+  const base = createBasePayload({ senderLogin, senderId, ...rest });
+
+  const payload: IssueCommentPayload = {
+    ...base,
+    action: "created",
+    issue: {
+      number: issueNumber,
+      title: issueTitle,
+      body: issueBody,
+      state: "open",
+      labels: [],
+      user: { login: "issue-creator", id: 3000 },
+    },
+    comment: {
+      id: commentId,
+      body: commentBody,
+      user: { login: senderLogin, id: senderId },
+    },
+  };
+
+  if (isPullRequest) {
+    payload.issue.pull_request = {
+      url: `https://api.github.com/repos/${base.repository.full_name}/pulls/${issueNumber}`,
+    };
+  }
+
+  return payload;
+}
+
+/**
+ * Create a payload for a PR review comment mentioning @openhands
+ */
+export function createPullRequestReviewCommentPayload(params: {
+  installationId?: number;
+  prNumber?: number;
+  prTitle?: string;
+  prBody?: string;
+  commentBody?: string;
+  commentId?: number;
+  filePath?: string;
+  lineNumber?: number;
+  headBranch?: string;
+  baseBranch?: string;
+  repositoryName?: string;
+  repositoryOwner?: string;
+  senderLogin?: string;
+  senderId?: number;
+}): PullRequestReviewCommentPayload {
+  const {
+    prNumber = 2,
+    prTitle = "Test PR for OpenHands Resolver",
+    prBody = "This is a test PR.",
+    commentBody = "@openhands please fix this code",
+    commentId = 2001,
+    filePath = "src/main.ts",
+    lineNumber = 10,
+    headBranch = "feature-branch",
+    baseBranch = "main",
+    senderLogin = "test-user",
+    senderId = 2000,
+    ...rest
+  } = params;
+
+  const base = createBasePayload({ senderLogin, senderId, ...rest });
+
+  return {
+    ...base,
+    action: "created",
+    pull_request: {
+      number: prNumber,
+      title: prTitle,
+      body: prBody,
+      state: "open",
+      head: { ref: headBranch, sha: "abc123def456" },
+      base: { ref: baseBranch },
+      user: { login: "pr-creator", id: 4000 },
+    },
+    comment: {
+      id: commentId,
+      node_id: `PRRC_${commentId}`,
+      body: commentBody,
+      path: filePath,
+      line: lineNumber,
+      user: { login: senderLogin, id: senderId },
+    },
+  };
+}
+
+/**
+ * Get the GitHub event type for a payload
+ */
+export function getEventType(payload: GitHubWebhookPayload): string {
+  if ("comment" in payload && "pull_request" in payload) {
+    return "pull_request_review_comment";
+  }
+  if ("comment" in payload) {
+    return "issue_comment";
+  }
+  if ("issue" in payload) {
+    return "issues";
+  }
+  return "unknown";
+}
@@ -0,0 +1,6 @@
+/**
+ * Mock Server Exports
+ */
+
+export * from "./github-webhook-payloads";
+export * from "./mock-github-client";
@@ -0,0 +1,223 @@
+/**
+ * Mock GitHub Client
+ *
+ * Client utilities for interacting with the mock GitHub server during tests.
+ */
+
+import {
+  createIssueLabeledPayload,
+  createIssueCommentPayload,
+  createPullRequestReviewCommentPayload,
+  getEventType,
+  GitHubWebhookPayload,
+} from "./github-webhook-payloads";
+
+export interface MockGitHubClientConfig {
+  mockServerUrl: string;
+  webhookTargetUrl: string;
+}
+
+export interface TriggerWebhookResult {
+  status: string;
+  targetUrl: string;
+  eventType: string;
+  responseStatus: number;
+  responseBody: string;
+}
+
+export interface WebhookEvent {
+  action: string;
+  payload: Record<string, unknown>;
+  timestamp: string;
+}
+
+export interface OutgoingResponse {
+  body: string;
+  timestamp: string;
+}
+
+/**
+ * Client for interacting with the Mock GitHub Server
+ */
+export class MockGitHubClient {
+  private mockServerUrl: string;
+
+  private webhookTargetUrl: string;
+
+  constructor(config: MockGitHubClientConfig) {
+    this.mockServerUrl = config.mockServerUrl;
+    this.webhookTargetUrl = config.webhookTargetUrl;
+  }
+
+  /**
+   * Check if the mock server is healthy
+   */
+  async healthCheck(): Promise<boolean> {
+    try {
+      const response = await fetch(`${this.mockServerUrl}/_health`);
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Wait for the mock server to be ready
+   */
+  async waitForReady(timeoutMs = 30000): Promise<void> {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+      if (await this.healthCheck()) {
+        return;
+      }
+      await new Promise((resolve) => setTimeout(resolve, 500));
+    }
+    throw new Error(`Mock GitHub server not ready after ${timeoutMs}ms`);
+  }
+
+  /**
+   * Trigger a webhook to the target URL
+   */
+  async triggerWebhook(
+    payload: GitHubWebhookPayload,
+  ): Promise<TriggerWebhookResult> {
+    const eventType = getEventType(payload);
+
+    const response = await fetch(
+      `${this.mockServerUrl}/_test/trigger-webhook`,
+      {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          targetUrl: this.webhookTargetUrl,
+          eventType,
+          payload,
+        }),
+      },
+    );
+
+    return response.json();
+  }
+
+  /**
+   * Trigger an issue labeled event (simulates adding the openhands label)
+   */
+  async triggerIssueLabeledEvent(
+    params?: Parameters<typeof createIssueLabeledPayload>[0],
+  ): Promise<TriggerWebhookResult> {
+    const payload = createIssueLabeledPayload(params || {});
+    return this.triggerWebhook(payload);
+  }
+
+  /**
+   * Trigger an issue comment event (simulates @openhands mention in issue)
+   */
+  async triggerIssueCommentEvent(
+    params?: Parameters<typeof createIssueCommentPayload>[0],
+  ): Promise<TriggerWebhookResult> {
+    const payload = createIssueCommentPayload(params || {});
+    return this.triggerWebhook(payload);
+  }
+
+  /**
+   * Trigger a PR review comment event (simulates @openhands mention in PR)
+   */
+  async triggerPRReviewCommentEvent(
+    params?: Parameters<typeof createPullRequestReviewCommentPayload>[0],
+  ): Promise<TriggerWebhookResult> {
+    const payload = createPullRequestReviewCommentPayload(params || {});
+    return this.triggerWebhook(payload);
+  }
+
+  /**
+   * Get all recorded webhook events
+   */
+  async getWebhookEvents(): Promise<WebhookEvent[]> {
+    const response = await fetch(`${this.mockServerUrl}/_test/webhook-events`);
+    return response.json();
+  }
+
+  /**
+   * Get all outgoing responses (comments posted by the resolver)
+   */
+  async getOutgoingResponses(): Promise<OutgoingResponse[]> {
+    const response = await fetch(
+      `${this.mockServerUrl}/_test/outgoing-responses`,
+    );
+    return response.json();
+  }
+
+  /**
+   * Clear all recorded events
+   */
+  async clearEvents(): Promise<void> {
+    await fetch(`${this.mockServerUrl}/_test/clear-events`, { method: "POST" });
+  }
+
+  /**
+   * Reset all mock data to initial state
+   */
+  async reset(): Promise<void> {
+    await fetch(`${this.mockServerUrl}/_test/reset`, { method: "POST" });
+  }
+
+  /**
+   * Wait for the resolver to post a response
+   * @param timeoutMs Maximum time to wait
+   * @param expectedCount Number of responses to wait for (default: 1)
+   * @param checkIntervalMs How often to check for responses
+   */
+  async waitForResponse(
+    timeoutMs = 120000,
+    expectedCount = 1,
+    checkIntervalMs = 2000,
+  ): Promise<OutgoingResponse[]> {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+      const responses = await this.getOutgoingResponses();
+      if (responses.length >= expectedCount) {
+        return responses;
+      }
+      await new Promise((resolve) => setTimeout(resolve, checkIntervalMs));
+    }
+    throw new Error(
+      `Timed out waiting for ${expectedCount} response(s) after ${timeoutMs}ms`,
+    );
+  }
+
+  /**
+   * Wait for a response containing specific text
+   */
+  async waitForResponseContaining(
+    expectedText: string,
+    timeoutMs = 120000,
+    checkIntervalMs = 2000,
+  ): Promise<OutgoingResponse> {
+    const startTime = Date.now();
+    while (Date.now() - startTime < timeoutMs) {
+      const responses = await this.getOutgoingResponses();
+      for (const response of responses) {
+        if (response.body.includes(expectedText)) {
+          return response;
+        }
+      }
+      await new Promise((resolve) => setTimeout(resolve, checkIntervalMs));
+    }
+    throw new Error(
+      `Timed out waiting for response containing "${expectedText}" after ${timeoutMs}ms`,
+    );
+  }
+}
+
+/**
+ * Create a MockGitHubClient with default configuration
+ */
+export function createMockGitHubClient(
+  mockServerPort = 9999,
+  appPort = 12000,
+): MockGitHubClient {
+  return new MockGitHubClient({
+    mockServerUrl: `http://localhost:${mockServerPort}`,
+    webhookTargetUrl: `http://localhost:${appPort}/api/integration/github/events`,
+  });
+}
@@ -0,0 +1,47 @@
+{
+  "name": "openhands-integration-tests",
+  "version": "1.0.0",
+  "description": "Integration and smoke tests for OpenHands using Playwright",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "test": "playwright test",
+    "test:smoke": "playwright test --grep @smoke",
+    "test:github-resolver": "playwright test --grep @github-resolver",
+    "test:staging": "cross-env BASE_URL=https://staging.all-hands.dev playwright test",
+    "test:production": "cross-env BASE_URL=https://app.all-hands.dev playwright test",
+    "test:feature": "playwright test",
+    "test:headed": "playwright test --headed",
+    "test:debug": "playwright test --debug",
+    "test:ui": "playwright test --ui",
+    "setup:auth": "playwright test --project=setup",
+    "report": "playwright show-report",
+    "codegen": "playwright codegen",
+    "typecheck": "tsc --noEmit",
+    "lint": "npm run typecheck && eslint . --ext .ts && prettier --check \"**/*.ts\"",
+    "lint:fix": "eslint . --ext .ts --fix && prettier --write \"**/*.ts\"",
+    "mock:github": "tsx mocks/github-mock-server.ts",
+    "mock:github:start": "tsx mocks/github-mock-server.ts &"
+  },
+  "devDependencies": {
+    "@playwright/test": "^1.57.0",
+    "@types/node": "^22.0.0",
+    "@typescript-eslint/eslint-plugin": "^7.18.0",
+    "@typescript-eslint/parser": "^7.18.0",
+    "cross-env": "^7.0.3",
+    "dotenv": "^16.4.5",
+    "eslint": "^8.57.0",
+    "eslint-config-airbnb-base": "^15.0.0",
+    "eslint-config-airbnb-typescript": "^18.0.0",
+    "eslint-config-prettier": "^9.1.0",
+    "eslint-plugin-import": "^2.31.0",
+    "eslint-plugin-prettier": "^5.2.1",
+    "eslint-plugin-unused-imports": "^3.2.0",
+    "prettier": "^3.4.2",
+    "tsx": "^4.19.0",
+    "typescript": "^5.6.0"
+  },
+  "engines": {
+    "node": ">=20.0.0"
+  }
+}
@@ -0,0 +1,89 @@
+import { Page, Locator, expect } from "@playwright/test";
+
+/**
+ * Base page object class that provides common functionality
+ * for all page objects in the test suite.
+ */
+export class BasePage {
+  readonly page: Page;
+
+  constructor(page: Page) {
+    this.page = page;
+  }
+
+  /**
+   * Navigate to a specific path
+   */
+  async goto(path: string = "/"): Promise<void> {
+    await this.page.goto(path);
+    await this.waitForPageLoad();
+  }
+
+  /**
+   * Wait for the page to fully load
+   */
+  async waitForPageLoad(): Promise<void> {
+    await this.page
+      .waitForLoadState("networkidle", { timeout: 30_000 })
+      .catch(() => {});
+    await this.page.waitForLoadState("domcontentloaded");
+  }
+
+  /**
+   * Wait for an element to be visible
+   */
+  async waitForElement(
+    locator: Locator,
+    timeout: number = 30_000,
+  ): Promise<void> {
+    await expect(locator).toBeVisible({ timeout });
+  }
+
+  /**
+   * Wait for an element to be hidden
+   */
+  async waitForElementHidden(
+    locator: Locator,
+    timeout: number = 30_000,
+  ): Promise<void> {
+    await expect(locator).toBeHidden({ timeout });
+  }
+
+  /**
+   * Take a screenshot with a descriptive name
+   */
+  async screenshot(name: string): Promise<void> {
+    await this.page.screenshot({
+      path: `test-results/screenshots/${name}-${Date.now()}.png`,
+      fullPage: true,
+    });
+  }
+
+  /**
+   * Check if an error banner is visible
+   */
+  async hasError(): Promise<boolean> {
+    const errorBanner = this.page.getByTestId("error-message-banner");
+    return errorBanner.isVisible().catch(() => false);
+  }
+
+  /**
+   * Get error message if error banner is present
+   */
+  async getErrorMessage(): Promise<string | null> {
+    const errorBanner = this.page.getByTestId("error-message-banner");
+    if (await errorBanner.isVisible().catch(() => false)) {
+      return errorBanner.textContent();
+    }
+    return null;
+  }
+
+  /**
+   * Wait for network to be idle
+   */
+  async waitForNetworkIdle(timeout: number = 10_000): Promise<void> {
+    await this.page
+      .waitForLoadState("networkidle", { timeout })
+      .catch(() => {});
+  }
+}
@@ -0,0 +1,326 @@
+import { Page, Locator, expect } from "@playwright/test";
+import { BasePage } from "./BasePage";
+
+/**
+ * Agent states that can be observed during conversation
+ */
+export enum AgentState {
+  LOADING = "loading",
+  RUNNING = "running",
+  AWAITING_USER_INPUT = "awaiting_user_input",
+  AWAITING_USER_CONFIRMATION = "awaiting_user_confirmation",
+  FINISHED = "finished",
+  ERROR = "error",
+  PAUSED = "paused",
+  STOPPED = "stopped",
+  INIT = "init",
+}
+
+/**
+ * Page object for the Conversation/Chat interface where users
+ * interact with the OpenHands agent.
+ */
+export class ConversationPage extends BasePage {
+  // Main containers
+  readonly appRoute: Locator;
+
+  readonly chatBox: Locator;
+
+  // Chat input elements
+  readonly chatInput: Locator;
+
+  readonly sendButton: Locator;
+
+  readonly stopButton: Locator;
+
+  // Message elements
+  readonly errorBanner: Locator;
+
+  readonly waitingMessage: Locator;
+
+  // Status indicators
+  readonly statusIndicator: Locator;
+
+  constructor(page: Page) {
+    super(page);
+
+    this.appRoute = page.getByTestId("app-route");
+    this.chatBox = page.getByTestId("interactive-chat-box");
+    this.chatInput = page.getByTestId("chat-input");
+    this.sendButton = page
+      .locator(
+        'button[type="submit"], button:has-text("Send"), [data-testid*="send"]',
+      )
+      .first();
+    this.stopButton = page
+      .locator('button:has-text("Stop"), [data-testid*="stop"]')
+      .first();
+    this.errorBanner = page.getByTestId("error-message-banner");
+    this.waitingMessage = page.locator('[data-testid*="waiting"]').first();
+    this.statusIndicator = page.getByTestId("status-icon");
+  }
+
+  /**
+   * Navigate to a specific conversation
+   */
+  async gotoConversation(conversationId: string): Promise<void> {
+    await super.goto(`/conversation/${conversationId}`);
+    await this.waitForConversationReady();
+  }
+
+  /**
+   * Wait for conversation interface to be ready for input
+   */
+  async waitForConversationReady(timeout: number = 30_000): Promise<void> {
+    // Wait for the chat interface to appear
+    await expect(this.chatBox).toBeVisible({ timeout });
+
+    // Wait for the chat input to be visible
+    await expect(this.chatInput).toBeVisible({ timeout });
+
+    // Wait for agent to be ready by checking for "Waiting for task" text
+    // Note: Using text search since data-testid is not yet deployed to staging
+    const waitingForTaskText = this.page.getByText("Waiting for task");
+    await expect(waitingForTaskText).toBeVisible({ timeout });
+  }
+
+  /**
+   * Wait for the agent to be ready to receive input
+   */
+  async waitForAgentReady(timeout: number = 90_000): Promise<void> {
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeout) {
+      // Check if there's an error
+      if (await this.hasError()) {
+        const errorMsg = await this.getErrorMessage();
+        throw new Error(`Agent error: ${errorMsg}`);
+      }
+
+      // Check if input is enabled (agent is ready)
+      const isInputEnabled = await this.isChatInputEnabled();
+      if (isInputEnabled) {
+        return;
+      }
+
+      // Wait a bit before checking again
+      await this.page.waitForTimeout(1000);
+    }
+
+    throw new Error(`Agent not ready within ${timeout}ms timeout`);
+  }
+
+  /**
+   * Check if the chat input is enabled
+   */
+  async isChatInputEnabled(): Promise<boolean> {
+    try {
+      // contentEditable divs don't have a disabled state, check for pointer-events or class
+      const isVisible = await this.chatInput.isVisible();
+      if (!isVisible) return false;
+
+      // Check if there's a loading state or disabled class
+      const classes = await this.chatInput.getAttribute("class");
+      if (classes?.includes("disabled") || classes?.includes("loading")) {
+        return false;
+      }
+
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Send a message to the agent
+   */
+  async sendMessage(message: string): Promise<void> {
+    // Wait for input to be ready
+    await expect(this.chatInput).toBeVisible({ timeout: 30_000 });
+
+    // Clear any existing content and type the message
+    await this.chatInput.click();
+    await this.chatInput.fill("");
+    await this.page.keyboard.type(message);
+
+    // Submit the message
+    await this.page.keyboard.press("Enter");
+
+    // Small delay to ensure message is sent
+    await this.page.waitForTimeout(500);
+  }
+
+  /**
+   * Wait for agent to respond (agent starts processing)
+   */
+  async waitForAgentProcessing(timeout: number = 10_000): Promise<void> {
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeout) {
+      // Check if agent is processing (input disabled or loading indicator visible)
+      const isProcessing = await this.isAgentProcessing();
+      if (isProcessing) {
+        return;
+      }
+
+      await this.page.waitForTimeout(500);
+    }
+
+    // It's okay if we don't see processing state - agent might have already finished
+  }
+
+  /**
+   * Check if agent is currently processing
+   */
+  async isAgentProcessing(): Promise<boolean> {
+    // Check for loading indicators or disabled input
+    const loadingIndicator = this.page
+      .locator(
+        '[data-testid*="loading"], [class*="loading"], [class*="spinner"]',
+      )
+      .first();
+    if (
+      await loadingIndicator.isVisible({ timeout: 1_000 }).catch(() => false)
+    ) {
+      return true;
+    }
+
+    // Check if input is disabled (indicates processing)
+    const isInputEnabled = await this.isChatInputEnabled();
+    return !isInputEnabled;
+  }
+
+  /**
+   * Wait for agent to complete processing and return to ready state
+   */
+  async waitForAgentComplete(timeout: number = 120_000): Promise<void> {
+    const startTime = Date.now();
+
+    // First, wait for processing to start
+    await this.waitForAgentProcessing(10_000).catch(() => {});
+
+    // Then wait for processing to complete
+    while (Date.now() - startTime < timeout) {
+      // Check for errors
+      if (await this.hasError()) {
+        const errorMsg = await this.getErrorMessage();
+        throw new Error(`Agent error during processing: ${errorMsg}`);
+      }
+
+      // Check if agent is back to ready state
+      const isInputEnabled = await this.isChatInputEnabled();
+      if (isInputEnabled) {
+        return;
+      }
+
+      await this.page.waitForTimeout(1000);
+    }
+
+    throw new Error(`Agent did not complete within ${timeout}ms timeout`);
+  }
+
+  /**
+   * Get all visible messages in the chat
+   */
+  async getMessages(): Promise<string[]> {
+    const messageElements = this.page.locator(
+      '[data-testid*="message"], [class*="message"]',
+    );
+    return messageElements.allTextContents();
+  }
+
+  /**
+   * Get the last message from the agent
+   */
+  async getLastAgentMessage(): Promise<string | null> {
+    const messages = await this.getMessages();
+    // Return the last message that's likely from the agent
+    return messages.length > 0 ? messages[messages.length - 1] : null;
+  }
+
+  /**
+   * Wait for a message containing specific text to appear
+   * @param expectedText - The text to search for in messages
+   * @param timeout - Maximum time to wait in milliseconds
+   * @returns The message containing the expected text
+   */
+  async waitForMessageContaining(
+    expectedText: string,
+    timeout: number = 120_000,
+  ): Promise<string> {
+    const startTime = Date.now();
+
+    while (Date.now() - startTime < timeout) {
+      // Check for errors first
+      if (await this.hasError()) {
+        const errorMsg = await this.getErrorMessage();
+        throw new Error(`Agent error while waiting for message: ${errorMsg}`);
+      }
+
+      // Get all messages and check if any contain the expected text
+      const messages = await this.getMessages();
+      for (const message of messages) {
+        if (message.includes(expectedText)) {
+          return message;
+        }
+      }
+
+      // Wait a bit before checking again
+      await this.page.waitForTimeout(1000);
+    }
+
+    // Get all messages for error reporting
+    const allMessages = await this.getMessages();
+    throw new Error(
+      `Timeout waiting for message containing "${expectedText}" after ${timeout}ms. ` +
+        `Messages found: ${JSON.stringify(allMessages.slice(-5))}`,
+    );
+  }
+
+  /**
+   * Stop the currently running agent
+   */
+  async stopAgent(): Promise<void> {
+    if (
+      await this.stopButton.isVisible({ timeout: 2_000 }).catch(() => false)
+    ) {
+      await this.stopButton.click();
+      await this.page.waitForTimeout(1000);
+    }
+  }
+
+  /**
+   * Verify no error messages are displayed
+   */
+  async verifyNoErrors(): Promise<void> {
+    const hasError = await this.hasError();
+    if (hasError) {
+      const errorMsg = await this.getErrorMessage();
+      throw new Error(`Unexpected error message: ${errorMsg}`);
+    }
+  }
+
+  /**
+   * Execute a complete conversation flow:
+   * 1. Wait for agent to be ready
+   * 2. Send message
+   * 3. Wait for completion
+   * 4. Verify no errors
+   */
+  async executePrompt(
+    message: string,
+    timeout: number = 120_000,
+  ): Promise<void> {
+    // Ensure agent is ready
+    await this.waitForAgentReady(30_000);
+
+    // Send the message
+    await this.sendMessage(message);
+
+    // Wait for completion
+    await this.waitForAgentComplete(timeout);
+
+    // Verify no errors
+    await this.verifyNoErrors();
+  }
+}
@@ -0,0 +1,186 @@
+import { Page, Locator, expect } from "@playwright/test";
+import { BasePage } from "./BasePage";
+
+/**
+ * Page object for the Home screen where users start new conversations
+ * and view recent conversations.
+ */
+export class HomePage extends BasePage {
+  // Main containers
+  readonly homeScreen: Locator;
+
+  readonly newConversationSection: Locator;
+
+  readonly recentConversationsSection: Locator;
+
+  // User avatar and menu
+  readonly userAvatar: Locator;
+
+  readonly accountSettingsMenu: Locator;
+
+  // Repository selection
+  readonly repoSelector: Locator;
+
+  readonly repoSearchInput: Locator;
+
+  constructor(page: Page) {
+    super(page);
+
+    this.homeScreen = page.getByTestId("home-screen");
+    this.newConversationSection = page.getByTestId(
+      "home-screen-new-conversation-section",
+    );
+    this.recentConversationsSection = page.getByTestId(
+      "home-screen-recent-conversations-section",
+    );
+    this.userAvatar = page.getByTestId("user-avatar");
+    this.accountSettingsMenu = page.getByTestId(
+      "account-settings-context-menu",
+    );
+    this.repoSelector = page.locator('[data-testid*="repo"]').first();
+    this.repoSearchInput = page
+      .locator('input[placeholder*="repository"], input[placeholder*="repo"]')
+      .first();
+  }
+
+  /**
+   * Navigate to the home page
+   */
+  async goto(): Promise<void> {
+    await super.goto("/");
+    await this.waitForHomeScreen();
+  }
+
+  /**
+   * Wait for the home screen to be fully loaded
+   */
+  async waitForHomeScreen(): Promise<void> {
+    await expect(this.homeScreen).toBeVisible({ timeout: 30_000 });
+    await this.waitForNetworkIdle();
+  }
+
+  /**
+   * Check if user is logged in by verifying home screen is visible
+   */
+  async isLoggedIn(): Promise<boolean> {
+    try {
+      await expect(this.homeScreen).toBeVisible({ timeout: 10_000 });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Select a repository by searching for it
+   * @param repoUrl - Full repository URL (e.g., https://github.com/OpenHands/deploy)
+   */
+  async selectRepository(repoUrl: string): Promise<void> {
+    // Extract repo name from URL
+    const repoName = repoUrl.split("/").slice(-2).join("/");
+
+    // Look for repository selector/input
+    const repoInput = this.page
+      .locator('input[placeholder*="repository"], input[placeholder*="search"]')
+      .first();
+    const repoSelector = this.page
+      .locator('[class*="repo"], [data-testid*="repo"]')
+      .first();
+
+    // Try to find and interact with repo selection
+    if (await repoInput.isVisible({ timeout: 5_000 }).catch(() => false)) {
+      await repoInput.fill(repoName);
+      await this.page.waitForTimeout(1000); // Wait for search results
+    } else if (
+      await repoSelector.isVisible({ timeout: 5_000 }).catch(() => false)
+    ) {
+      await repoSelector.click();
+      await this.page.waitForTimeout(500);
+    }
+
+    // Click on the repository in the dropdown/list
+    const repoOption = this.page.locator(`text=${repoName}`).first();
+    if (await repoOption.isVisible({ timeout: 5_000 }).catch(() => false)) {
+      await repoOption.click();
+    }
+  }
+
+  /**
+   * Start a new conversation
+   * @param buttonId - Optional test ID of the button to click (default: 'launch-new-conversation-button')
+   */
+  async startNewConversation(
+    buttonId: string = "launch-new-conversation-button",
+  ): Promise<void> {
+    const startButton = this.page.getByTestId(buttonId);
+    if (await startButton.isVisible({ timeout: 5_000 }).catch(() => false)) {
+      await startButton.click();
+    }
+
+    // Wait for conversation/chat interface to load
+    await this.page
+      .waitForURL(/conversation|chat|app/, { timeout: 30_000 })
+      .catch(() => {});
+  }
+
+  /**
+   * Open user settings menu
+   *
+   * Note: The menu is conditionally rendered based on async state (config loaded,
+   * user authenticated, etc.). We need to wait for the menu element to be attached
+   * to the DOM before we can interact with it. The menu appears on hover over the
+   * user-actions container, or when clicking the avatar toggles state.
+   */
+  async openUserMenu(): Promise<void> {
+    // First, wait for the user avatar to be visible
+    await expect(this.userAvatar).toBeVisible({ timeout: 10_000 });
+
+    // Wait for the menu to be attached to the DOM (may not be visible yet)
+    // This ensures the async config/auth state has loaded
+    await this.accountSettingsMenu.waitFor({
+      state: "attached",
+      timeout: 15_000,
+    });
+
+    // Now hover over the user-actions container to trigger the menu visibility
+    // The menu uses CSS group-hover to show, so we need to hover the parent
+    const userActionsContainer = this.page.getByTestId("user-actions");
+    await userActionsContainer.hover();
+
+    // Wait for the menu to become visible
+    await expect(this.accountSettingsMenu).toBeVisible({ timeout: 5_000 });
+  }
+
+  /**
+   * Get list of recent conversations
+   */
+  async getRecentConversations(): Promise<string[]> {
+    await this.waitForElement(this.recentConversationsSection);
+    const conversations = await this.recentConversationsSection
+      .locator("a, button, [role='button']")
+      .allTextContents();
+    return conversations.filter((text) => text.trim().length > 0);
+  }
+
+  /**
+   * Click on the first conversation in the recent conversations list
+   * The conversations are displayed as links in the recent-conversations section
+   */
+  async clickFirstConversation(): Promise<void> {
+    // Wait for recent conversations section to be visible
+    const recentConversations = this.page.getByTestId("recent-conversations");
+    await expect(recentConversations).toBeVisible({ timeout: 10_000 });
+
+    // Find the first conversation link (they link to /conversations/{id})
+    const firstConversationLink = recentConversations
+      .locator('a[href^="/conversations/"]')
+      .first();
+    await expect(firstConversationLink).toBeVisible({ timeout: 10_000 });
+
+    // Click the conversation
+    await firstConversationLink.click();
+
+    // Wait for navigation to conversation page
+    await this.page.waitForURL(/\/conversations\//, { timeout: 30_000 });
+  }
+}
@@ -0,0 +1,3 @@
+export { BasePage } from "./BasePage";
+export { HomePage } from "./HomePage";
+export { ConversationPage, AgentState } from "./ConversationPage";
@@ -0,0 +1,151 @@
+import { defineConfig, devices } from "@playwright/test";
+import dotenv from "dotenv";
+import path from "path";
+import fs from "fs";
+
+// Load environment variables from .env file
+dotenv.config({ path: path.resolve(import.meta.dirname, ".env") });
+
+// Check if auth file exists (will be created by setup project)
+const authFile = path.resolve(import.meta.dirname, "./fixtures/auth.json");
+const hasAuthFile = fs.existsSync(authFile);
+
+/**
+ * Environment URLs for different deployment targets
+ */
+const environments = {
+  staging: "https://staging.all-hands.dev",
+  production: "https://app.all-hands.dev",
+  local: "http://localhost:3000",
+};
+
+/**
+ * Get the base URL from environment variable or default to staging
+ * For feature branches, use: https://<feature_branch_name>.staging.all-hands.dev
+ */
+function getBaseURL(): string {
+  const envUrl = process.env.BASE_URL;
+  if (envUrl) {
+    return envUrl;
+  }
+
+  const env = process.env.TEST_ENV || "staging";
+  return environments[env as keyof typeof environments] || environments.staging;
+}
+
+/**
+ * Playwright configuration for OpenHands integration tests
+ *
+ * Supports multiple environments:
+ * - staging: https://staging.all-hands.dev
+ * - production: https://app.all-hands.dev
+ * - feature branches: https://<branch>.staging.all-hands.dev
+ *
+ * Usage:
+ * - npm run test                    # Run all tests against staging
+ * - npm run test:staging            # Run all tests against staging
+ * - npm run test:production         # Run all tests against production
+ * - BASE_URL=https://my-branch.staging.all-hands.dev npm test  # Feature branch
+ */
+export default defineConfig({
+  testDir: "./tests",
+
+  // Run tests in parallel
+  fullyParallel: false, // Disabled for smoke tests to ensure sequential execution
+
+  // Fail the build on CI if you accidentally left test.only in the source code
+  forbidOnly: !!process.env.CI,
+
+  // Retry failed tests (more retries in CI)
+  retries: process.env.CI ? 2 : 1,
+
+  // Limit parallel workers (smoke tests should run sequentially)
+  workers: process.env.CI ? 1 : 1,
+
+  // Reporter configuration
+  reporter: process.env.CI
+    ? [["html", { outputFolder: "playwright-report" }], ["list"], ["github"]]
+    : [["html", { outputFolder: "playwright-report" }], ["list"]],
+
+  // Timeout configuration
+  timeout: 120_000, // 2 minutes per test (agent operations can be slow)
+  expect: {
+    timeout: 30_000, // 30 seconds for assertions
+  },
+
+  // Shared settings for all projects
+  use: {
+    // Base URL for navigation
+    baseURL: getBaseURL(),
+
+    // Collect trace on failure
+    trace: "on-first-retry",
+
+    // Screenshots on failure
+    screenshot: "only-on-failure",
+
+    // Video recording (useful for debugging CI failures)
+    video: process.env.CI ? "on-first-retry" : "off",
+
+    // Ignore SSL errors (for staging/development environments)
+    ignoreHTTPSErrors: true,
+
+    // Use persisted authentication state only if it exists
+    storageState: hasAuthFile ? authFile : undefined,
+
+    // Browser viewport
+    viewport: { width: 1280, height: 720 },
+
+    // Action timeout
+    actionTimeout: 15_000,
+
+    // Navigation timeout
+    navigationTimeout: 30_000,
+  },
+
+  // Define test projects
+  projects: [
+    // Setup project - handles authentication
+    {
+      name: "setup",
+      testMatch: /global-setup\.ts/,
+      use: {
+        storageState: undefined, // Don't use existing auth for setup
+      },
+    },
+
+    // Chromium tests (primary browser)
+    {
+      name: "chromium",
+      use: {
+        ...devices["Desktop Chrome"],
+      },
+      dependencies: ["setup"],
+    },
+
+    // Firefox tests (optional - run with --project=firefox)
+    {
+      name: "firefox",
+      use: {
+        ...devices["Desktop Firefox"],
+      },
+      dependencies: ["setup"],
+    },
+
+    // WebKit tests (optional - run with --project=webkit)
+    {
+      name: "webkit",
+      use: {
+        ...devices["Desktop Safari"],
+      },
+      dependencies: ["setup"],
+    },
+  ],
+
+  // Output directory for test artifacts
+  outputDir: "./test-results",
+
+  // Global setup/teardown
+  globalSetup: undefined, // We use a setup project instead for better parallelization
+  globalTeardown: undefined,
+});
@@ -0,0 +1,403 @@
+import { test, expect } from "@playwright/test";
+import { ChildProcess, spawn } from "child_process";
+import path from "path";
+import crypto from "crypto";
+import {
+  MockGitHubClient,
+  createMockGitHubClient,
+  createIssueLabeledPayload,
+} from "../mocks";
+import { ConversationPage, HomePage } from "../pages";
+
+/**
+ * GitHub Resolver Integration Tests
+ *
+ * These tests verify the GitHub resolver integration in two modes:
+ *
+ * ## Mode 1: Mock Server Tests (for local development)
+ * Uses a local mock GitHub server to test the full webhook flow.
+ * Requires:
+ * - OpenHands running locally with GITHUB_APP_WEBHOOK_SECRET=test-webhook-secret
+ * - The app configured to use the mock server for GitHub API calls
+ *
+ * ## Mode 2: Live Environment Tests (for staging/production)
+ * Tests against real deployed environments using the real GitHub API.
+ * Requires:
+ * - GITHUB_TEST_USERNAME and GITHUB_TEST_PASSWORD for authentication
+ * - The webhook endpoint to be accessible
+ *
+ * Environment Variables:
+ * - USE_MOCK_GITHUB: Set to "true" to use mock server mode
+ * - MOCK_GITHUB_PORT: Port for the mock GitHub server (default: 9999)
+ * - APP_PORT: Port where the OpenHands app is running (default: 12000)
+ * - GITHUB_APP_WEBHOOK_SECRET: Webhook secret for local testing
+ *
+ * Tags:
+ * - @github-resolver: GitHub resolver integration tests
+ * - @enterprise: Tests requiring enterprise features
+ */
+
+// Configuration
+const USE_MOCK_GITHUB = process.env.USE_MOCK_GITHUB === "true";
+const MOCK_GITHUB_PORT = parseInt(process.env.MOCK_GITHUB_PORT || "9999", 10);
+const APP_PORT = parseInt(process.env.APP_PORT || "12000", 10);
+const MOCK_SERVER_STARTUP_TIMEOUT = 30_000;
+const RESOLVER_RESPONSE_TIMEOUT = 180_000;
+
+// Mock server process
+let mockServerProcess: ChildProcess | null = null;
+let mockClient: MockGitHubClient | null = null;
+
+/**
+ * Generate webhook signature for testing
+ */
+function generateWebhookSignature(payload: string, secret: string): string {
+  const hmac = crypto.createHmac("sha256", secret);
+  hmac.update(payload);
+  return `sha256=${hmac.digest("hex")}`;
+}
+
+/**
+ * Start the mock GitHub server as a background process
+ */
+async function startMockServer(): Promise<void> {
+  if (!USE_MOCK_GITHUB) return;
+
+  const serverPath = path.join(
+    import.meta.dirname,
+    "../mocks/github-mock-server.ts",
+  );
+
+  console.log(`Starting mock GitHub server on port ${MOCK_GITHUB_PORT}...`);
+
+  mockServerProcess = spawn("npx", ["tsx", serverPath], {
+    env: {
+      ...process.env,
+      MOCK_GITHUB_PORT: String(MOCK_GITHUB_PORT),
+      MOCK_GITHUB_WEBHOOK_SECRET:
+        process.env.GITHUB_APP_WEBHOOK_SECRET || "test-webhook-secret",
+    },
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+
+  mockServerProcess.stdout?.on("data", (data) => {
+    console.log(`[Mock GitHub] ${data.toString().trim()}`);
+  });
+
+  mockServerProcess.stderr?.on("data", (data) => {
+    console.error(`[Mock GitHub ERROR] ${data.toString().trim()}`);
+  });
+
+  mockServerProcess.on("error", (error) => {
+    console.error(`[Mock GitHub] Failed to start server: ${error.message}`);
+  });
+
+  mockServerProcess.on("exit", (code) => {
+    console.log(`[Mock GitHub] Server exited with code ${code}`);
+  });
+
+  mockClient = createMockGitHubClient(MOCK_GITHUB_PORT, APP_PORT);
+  await mockClient.waitForReady(MOCK_SERVER_STARTUP_TIMEOUT);
+  console.log("Mock GitHub server is ready");
+}
+
+/**
+ * Stop the mock GitHub server
+ */
+async function stopMockServer(): Promise<void> {
+  if (mockServerProcess) {
+    console.log("Stopping mock GitHub server...");
+    mockServerProcess.kill("SIGTERM");
+
+    await new Promise<void>((resolve) => {
+      const timeout = setTimeout(() => {
+        mockServerProcess?.kill("SIGKILL");
+        resolve();
+      }, 5000);
+
+      mockServerProcess?.on("exit", () => {
+        clearTimeout(timeout);
+        resolve();
+      });
+    });
+
+    mockServerProcess = null;
+    console.log("Mock GitHub server stopped");
+  }
+}
+
+// ============================================================================
+// MOCK SERVER TESTS (for local development with mock GitHub)
+// ============================================================================
+
+test.describe("GitHub Resolver - Mock Server @github-resolver @enterprise @mock", () => {
+  test.describe.configure({ mode: "serial" });
+
+  // Skip this entire suite unless USE_MOCK_GITHUB is true
+  test.skip(!USE_MOCK_GITHUB, "Requires USE_MOCK_GITHUB=true");
+
+  test.beforeAll(async () => {
+    await startMockServer();
+  });
+
+  test.afterAll(async () => {
+    await stopMockServer();
+  });
+
+  test.beforeEach(async () => {
+    if (mockClient) {
+      await mockClient.reset();
+    }
+  });
+
+  test("mock server should be healthy", async () => {
+    expect(mockClient).not.toBeNull();
+    const isHealthy = await mockClient!.healthCheck();
+    expect(isHealthy).toBe(true);
+  });
+
+  test("should process issue labeled webhook and create conversation", async ({
+    page,
+    baseURL,
+  }) => {
+    expect(mockClient).not.toBeNull();
+
+    console.log("Triggering issue labeled webhook...");
+    const webhookResult = await mockClient!.triggerIssueLabeledEvent({
+      issueTitle: "Add README file",
+      issueBody: "Please add a README.md file with project documentation.",
+      labelName: "openhands",
+    });
+
+    console.log(`Webhook response: ${JSON.stringify(webhookResult)}`);
+    expect(webhookResult.responseStatus).toBe(200);
+
+    console.log("Waiting for resolver response...");
+    const response = await mockClient!.waitForResponseContaining(
+      "I'm on it",
+      RESOLVER_RESPONSE_TIMEOUT,
+    );
+
+    console.log(`Resolver response: ${response.body}`);
+    expect(response.body).toContain("I'm on it");
+    expect(response.body).toContain("track my progress");
+
+    const conversationLinkMatch = response.body.match(
+      /conversations\/([a-f0-9]+)/,
+    );
+    expect(conversationLinkMatch).not.toBeNull();
+
+    const conversationId = conversationLinkMatch![1];
+    console.log(`Conversation ID: ${conversationId}`);
+
+    const conversationPage = new ConversationPage(page);
+    await page.goto(`${baseURL}/conversations/${conversationId}`);
+    await conversationPage.waitForConversationReady(30_000);
+    await expect(conversationPage.chatBox).toBeVisible();
+
+    await page.screenshot({
+      path: "test-results/screenshots/github-resolver-conversation.png",
+    });
+
+    console.log("Issue labeled webhook test passed");
+  });
+
+  test("should process issue comment webhook with @openhands mention", async ({
+    page,
+    baseURL,
+  }) => {
+    expect(mockClient).not.toBeNull();
+
+    console.log("Triggering issue comment webhook...");
+    const webhookResult = await mockClient!.triggerIssueCommentEvent({
+      issueTitle: "Bug: Application crashes on startup",
+      issueBody: "The application crashes when I try to start it.",
+      commentBody: "@openhands please investigate this crash and fix it",
+    });
+
+    console.log(`Webhook response: ${JSON.stringify(webhookResult)}`);
+    expect(webhookResult.responseStatus).toBe(200);
+
+    console.log("Waiting for resolver response...");
+    const response = await mockClient!.waitForResponseContaining(
+      "I'm on it",
+      RESOLVER_RESPONSE_TIMEOUT,
+    );
+
+    console.log(`Resolver response: ${response.body}`);
+    expect(response.body).toContain("I'm on it");
+
+    const conversationLinkMatch = response.body.match(
+      /conversations\/([a-f0-9]+)/,
+    );
+    expect(conversationLinkMatch).not.toBeNull();
+
+    const conversationId = conversationLinkMatch![1];
+    const conversationPage = new ConversationPage(page);
+    await page.goto(`${baseURL}/conversations/${conversationId}`);
+    await conversationPage.waitForConversationReady(30_000);
+
+    await page.screenshot({
+      path: "test-results/screenshots/github-resolver-issue-comment.png",
+    });
+  });
+});
+
+// ============================================================================
+// LIVE ENVIRONMENT TESTS (for staging/production with real GitHub)
+// ============================================================================
+
+test.describe("GitHub Resolver - Live Environment @github-resolver @enterprise @live", () => {
+  test.describe.configure({ mode: "serial" });
+
+  let homePage: HomePage;
+
+  test.beforeEach(async ({ page }) => {
+    homePage = new HomePage(page);
+  });
+
+  test("should verify resolver conversations appear in conversation list", async ({
+    page,
+  }) => {
+    /**
+     * This test verifies that resolver-triggered conversations appear in the
+     * user's conversation list. It checks the infrastructure is working by
+     * looking at existing conversations.
+     */
+
+    // Navigate to home page (requires authentication via global-setup)
+    await homePage.goto();
+    await expect(homePage.homeScreen).toBeVisible({ timeout: 30_000 });
+
+    // Look for recent conversations
+    const recentConversations = page.getByTestId("recent-conversations");
+    await expect(recentConversations).toBeVisible({ timeout: 10_000 });
+
+    const conversationLinks = recentConversations.locator(
+      'a[href^="/conversations/"]',
+    );
+    const count = await conversationLinks.count();
+
+    console.log(`Found ${count} recent conversations`);
+
+    await page.screenshot({
+      path: "test-results/screenshots/resolver-conversations-list.png",
+    });
+
+    if (count > 0) {
+      const firstConversation = conversationLinks.first();
+      await firstConversation.click();
+
+      const conversationPage = new ConversationPage(page);
+      await conversationPage.waitForConversationReady(30_000);
+
+      await page.screenshot({
+        path: "test-results/screenshots/resolver-conversation-detail.png",
+      });
+
+      console.log("Successfully navigated to a conversation");
+    }
+  });
+
+  test("should be able to send webhook with valid signature format", async ({
+    baseURL,
+    request,
+  }) => {
+    /**
+     * This test verifies the webhook endpoint exists and validates signatures.
+     * We send a properly formatted but invalid webhook to verify:
+     * 1. The endpoint exists
+     * 2. Signature verification is working
+     */
+
+    const payload = createIssueLabeledPayload({
+      issueTitle: "Test Issue",
+      issueBody: "Test body for integration test",
+      labelName: "openhands",
+    });
+
+    const payloadString = JSON.stringify(payload);
+    const signature = generateWebhookSignature(payloadString, "wrong-secret");
+
+    const response = await request.post(
+      `${baseURL}/api/integration/github/events`,
+      {
+        headers: {
+          "Content-Type": "application/json",
+          "X-GitHub-Event": "issues",
+          "X-Hub-Signature-256": signature,
+          "X-GitHub-Delivery": crypto.randomUUID(),
+        },
+        data: payload,
+      },
+    );
+
+    console.log(`Webhook response status: ${response.status()}`);
+
+    // Either 403 (signature invalid) or 200 (if webhooks disabled) is acceptable
+    expect([200, 403]).toContain(response.status());
+
+    const responseText = await response.text();
+    console.log(`Webhook response: ${responseText}`);
+
+    if (response.status() === 403) {
+      console.log(
+        "Webhook signature validation is working (403 = invalid signature)",
+      );
+    } else if (response.status() === 200) {
+      const body = JSON.parse(responseText);
+      if (body.message?.includes("disabled")) {
+        console.log("GitHub webhooks are disabled on this environment");
+      }
+    }
+  });
+});
+
+// ============================================================================
+// ERROR HANDLING TESTS
+// ============================================================================
+
+test.describe("GitHub Resolver - Error Handling @github-resolver @enterprise", () => {
+  test("should reject webhook without signature header", async ({
+    baseURL,
+    request,
+  }) => {
+    const payload = { action: "labeled", installation: { id: 12345 } };
+
+    const response = await request.post(
+      `${baseURL}/api/integration/github/events`,
+      {
+        headers: {
+          "Content-Type": "application/json",
+          "X-GitHub-Event": "issues",
+        },
+        data: payload,
+      },
+    );
+
+    console.log(
+      `Response status: ${response.status()} (expected 403 or 200 if disabled)`,
+    );
+    expect([200, 403]).toContain(response.status());
+  });
+
+  test("should handle malformed JSON gracefully", async ({
+    baseURL,
+    request,
+  }) => {
+    const response = await request.post(
+      `${baseURL}/api/integration/github/events`,
+      {
+        headers: {
+          "Content-Type": "application/json",
+          "X-GitHub-Event": "issues",
+          "X-Hub-Signature-256": "sha256=invalid",
+        },
+        data: "not valid json{{{",
+      },
+    );
+
+    console.log(`Response status: ${response.status()}`);
+    expect([400, 403, 422, 500]).toContain(response.status());
+  });
+});
@@ -0,0 +1,325 @@
+import { test as setup, expect } from "@playwright/test";
+import path from "path";
+import fs from "fs";
+
+const authFile = path.join(import.meta.dirname, "../fixtures/auth.json");
+
+/**
+ * Global setup test that handles authentication.
+ *
+ * This test runs before all other tests and saves the authentication state
+ * to a file that can be reused across test runs.
+ *
+ * Authentication Methods:
+ * 1. GitHub OAuth (default) - Requires GITHUB_TEST_USERNAME and GITHUB_TEST_PASSWORD
+ * 2. Keycloak - Can be customized via KEYCLOAK_* environment variables
+ * 3. Pre-existing auth state - If fixtures/auth.json exists and is valid
+ *
+ * Environment Variables:
+ * - AUTH_METHOD: "github" | "keycloak" | "skip" (default: "github")
+ * - GITHUB_TEST_USERNAME: GitHub username for test account
+ * - GITHUB_TEST_PASSWORD: GitHub password for test account
+ * - GITHUB_TEST_TOTP_SECRET: (Optional) TOTP secret for 2FA
+ * - KEYCLOAK_URL: Keycloak server URL
+ * - KEYCLOAK_USERNAME: Keycloak test username
+ * - KEYCLOAK_PASSWORD: Keycloak test password
+ */
+setup("authenticate", async ({ page, baseURL }) => {
+  const authMethod = process.env.AUTH_METHOD || "github";
+
+  // Check if we should skip authentication (use existing auth state)
+  if (authMethod === "skip") {
+    if (fs.existsSync(authFile)) {
+      console.log(
+        "Using existing authentication state from fixtures/auth.json",
+      );
+      return;
+    }
+    throw new Error(
+      "AUTH_METHOD=skip but no existing auth.json found. Please run authentication first.",
+    );
+  }
+
+  // Navigate to the application
+  await page.goto(baseURL || "/");
+
+  // Check if already authenticated
+  const isAuthenticated = await checkIfAuthenticated(page);
+  if (isAuthenticated) {
+    console.log("Already authenticated, saving state...");
+    await page.context().storageState({ path: authFile });
+    return;
+  }
+
+  // Perform authentication based on method
+  if (authMethod === "github") {
+    await authenticateWithGitHub(page);
+  } else if (authMethod === "keycloak") {
+    await authenticateWithKeycloak(page);
+  } else {
+    throw new Error(`Unknown AUTH_METHOD: ${authMethod}`);
+  }
+
+  // Wait for successful redirect back to app (could be home page or accept-tos)
+  await page.waitForURL(
+    (url) => {
+      const urlString = url.toString();
+      return (
+        !urlString.includes("github.com") &&
+        !urlString.includes("login") &&
+        !urlString.includes("keycloak")
+      );
+    },
+    { timeout: 60_000 },
+  );
+
+  // Handle TOS acceptance if redirected to accept-tos page
+  if (page.url().includes("/accept-tos")) {
+    console.log(
+      "Redirected to accept-tos page after authentication, handling TOS acceptance...",
+    );
+    await handleTOSAcceptance(page);
+  }
+
+  // Verify authentication succeeded
+  await expect(page.getByTestId("home-screen")).toBeVisible({
+    timeout: 30_000,
+  });
+
+  // Save authentication state
+  await page.context().storageState({ path: authFile });
+  console.log("Authentication successful, state saved to fixtures/auth.json");
+});
+
+/**
+ * Check if the user is already authenticated
+ */
+async function checkIfAuthenticated(
+  page: import("@playwright/test").Page,
+): Promise<boolean> {
+  try {
+    // Look for elements that indicate authentication
+    const homeScreen = page.getByTestId("home-screen");
+    const loginPage = page.getByTestId("login-page");
+
+    // Wait a bit for the page to stabilize
+    await page
+      .waitForLoadState("networkidle", { timeout: 10_000 })
+      .catch(() => {});
+
+    // Check if we're on the home screen (authenticated)
+    const isOnHome = await homeScreen.isVisible().catch(() => false);
+    const isOnLogin = await loginPage.isVisible().catch(() => false);
+
+    return isOnHome && !isOnLogin;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Authenticate using GitHub OAuth
+ */
+async function authenticateWithGitHub(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  const username = process.env.GITHUB_TEST_USERNAME;
+  const password = process.env.GITHUB_TEST_PASSWORD;
+
+  if (!username || !password) {
+    throw new Error(
+      "GitHub authentication requires GITHUB_TEST_USERNAME and GITHUB_TEST_PASSWORD environment variables",
+    );
+  }
+
+  console.log("Starting GitHub authentication...");
+
+  // Click the GitHub login button
+  const githubButton = page.getByRole("button", { name: "Log in with GitHub" });
+  await expect(githubButton).toBeVisible({ timeout: 10_000 });
+  await githubButton.click();
+
+  // Wait for redirect - could be GitHub.com, home page, or accept-tos
+  // If user is already logged into Keycloak, they may be redirected back to the app
+  await page.waitForURL(
+    (url) => {
+      const urlString = url.toString();
+      return (
+        urlString.includes("github.com") ||
+        urlString.includes("/accept-tos") ||
+        // Check if redirected back to home (no login/keycloak in URL)
+        (!urlString.includes("keycloak") && !urlString.includes("/login"))
+      );
+    },
+    { timeout: 30_000 },
+  );
+
+  const currentUrl = page.url();
+
+  // If redirected to accept-tos, handle TOS acceptance
+  if (currentUrl.includes("/accept-tos")) {
+    console.log("Redirected to accept-tos page, handling TOS acceptance...");
+    await handleTOSAcceptance(page);
+    console.log("TOS acceptance completed");
+    return;
+  }
+
+  // If redirected to home page (already authenticated via Keycloak session)
+  if (!currentUrl.includes("github.com")) {
+    console.log("Already authenticated via Keycloak session");
+    return;
+  }
+
+  // Continue with GitHub login flow
+  // Fill in GitHub credentials
+  const usernameField = page.locator('input[name="login"]');
+  const passwordField = page.locator('input[name="password"]');
+
+  await usernameField.waitFor({ state: "visible", timeout: 10_000 });
+
+  await usernameField.fill(username);
+  await passwordField.fill(password);
+
+  // Submit the form
+  await page.locator('input[type="submit"][value="Sign in"]').click();
+
+  // Handle potential 2FA
+  const totpSecret = process.env.GITHUB_TEST_TOTP_SECRET;
+  if (totpSecret) {
+    await handle2FA(page, totpSecret);
+  }
+
+  // Handle OAuth authorization if needed
+  await handleOAuthAuthorization(page);
+
+  console.log("GitHub authentication flow completed");
+}
+
+/**
+ * Handle Terms of Service acceptance flow
+ */
+async function handleTOSAcceptance(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  // Wait for the TOS page to be fully loaded
+  await page
+    .waitForLoadState("networkidle", { timeout: 10_000 })
+    .catch(() => {});
+
+  // Find and click the TOS checkbox
+  const tosCheckbox = page.locator('input[type="checkbox"]');
+  await tosCheckbox.waitFor({ state: "visible", timeout: 10_000 });
+  await tosCheckbox.click();
+
+  // Find and click the Continue button
+  const continueButton = page.getByRole("button", { name: "Continue" });
+  await expect(continueButton).toBeEnabled({ timeout: 5_000 });
+  await continueButton.click();
+
+  // Wait for redirect to home page after TOS acceptance
+  await page.waitForURL(
+    (url) => {
+      const urlString = url.toString();
+      return !urlString.includes("/accept-tos");
+    },
+    { timeout: 30_000 },
+  );
+}
+
+/**
+ * Handle GitHub 2FA if enabled
+ */
+async function handle2FA(
+  page: import("@playwright/test").Page,
+  totpSecret: string,
+): Promise<void> {
+  try {
+    // Check if 2FA page appears
+    const otpField = page.locator('input[name="otp"]');
+    const isOtpVisible = await otpField
+      .isVisible({ timeout: 5_000 })
+      .catch(() => false);
+
+    if (isOtpVisible) {
+      console.log("2FA required, generating TOTP code...");
+
+      // Generate TOTP code (you'd need to implement this or use a library)
+      const totpCode = await generateTOTP(totpSecret);
+      await otpField.fill(totpCode);
+
+      // Submit 2FA
+      await page.locator('button[type="submit"]').click();
+    }
+  } catch {
+    // 2FA not required, continue
+  }
+}
+
+/**
+ * Generate TOTP code from secret
+ * Note: In production, use a proper TOTP library like 'otplib'
+ */
+async function generateTOTP(_secret: string): Promise<string> {
+  // This is a placeholder - in production, use:
+  // import { authenticator } from 'otplib';
+  // return authenticator.generate(secret);
+  throw new Error(
+    "TOTP generation not implemented. Install 'otplib' package and implement generateTOTP function.",
+  );
+}
+
+/**
+ * Handle OAuth authorization prompt if it appears
+ */
+async function handleOAuthAuthorization(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  try {
+    // Check if we need to authorize the app
+    const authorizeButton = page.locator('button[name="authorize"]');
+    const isAuthVisible = await authorizeButton
+      .isVisible({ timeout: 5_000 })
+      .catch(() => false);
+
+    if (isAuthVisible) {
+      console.log("OAuth authorization required, clicking authorize...");
+      await authorizeButton.click();
+    }
+  } catch {
+    // No authorization needed, continue
+  }
+}
+
+/**
+ * Authenticate using Keycloak
+ */
+async function authenticateWithKeycloak(
+  page: import("@playwright/test").Page,
+): Promise<void> {
+  const username = process.env.KEYCLOAK_USERNAME;
+  const password = process.env.KEYCLOAK_PASSWORD;
+
+  if (!username || !password) {
+    throw new Error(
+      "Keycloak authentication requires KEYCLOAK_USERNAME and KEYCLOAK_PASSWORD environment variables",
+    );
+  }
+
+  console.log("Starting Keycloak authentication...");
+
+  // Navigate to login page and initiate Keycloak flow
+  // The exact flow depends on your Keycloak configuration
+  await page.goto("/login");
+
+  // Wait for Keycloak login page
+  await page.waitForURL(/keycloak|auth/, { timeout: 30_000 });
+
+  // Fill in Keycloak credentials
+  await page.locator("#username").fill(username);
+  await page.locator("#password").fill(password);
+
+  // Submit
+  await page.locator("#kc-login").click();
+
+  console.log("Keycloak authentication flow completed");
+}
@@ -0,0 +1,484 @@
+import { test, expect } from "@playwright/test";
+import { HomePage, ConversationPage } from "../pages";
+
+/**
+ * Smoke Tests for OpenHands Application
+ *
+ * These tests verify the critical path of the application:
+ * 1. User can log in (handled by global-setup)
+ * 2. User can access the home screen
+ * 3. User can select a repository
+ * 4. User can start a conversation
+ * 5. Agent can process a simple prompt without errors
+ *
+ * Tags:
+ * - @smoke: Core smoke tests that must pass
+ * - @critical: Critical functionality tests
+ *
+ * Environment Variables:
+ * - TEST_REPO_URL: Repository to use for testing (default: null)
+ */
+
+// Test configuration
+const { TEST_REPO_URL } = process.env;
+
+test.describe("Smoke Tests @smoke", () => {
+  test.describe.configure({ mode: "serial" }); // Run tests in sequence
+
+  let homePage: HomePage;
+  let conversationPage: ConversationPage;
+
+  test.beforeEach(async ({ page }) => {
+    homePage = new HomePage(page);
+    conversationPage = new ConversationPage(page);
+  });
+
+  test("should display home screen after authentication @critical", async ({
+    page,
+  }) => {
+    await homePage.goto();
+
+    // Verify home screen is visible
+    await expect(homePage.homeScreen).toBeVisible({ timeout: 30_000 });
+
+    // Verify key sections are present
+    await expect(homePage.newConversationSection).toBeVisible();
+
+    // Take screenshot for verification
+    await page.screenshot({ path: "test-results/screenshots/home-screen.png" });
+  });
+
+  test("should have user avatar visible indicating logged in state @critical", async () => {
+    await homePage.goto();
+
+    // Verify user is logged in
+    const isLoggedIn = await homePage.isLoggedIn();
+    expect(isLoggedIn).toBe(true);
+
+    // Verify user avatar is visible
+    await expect(homePage.userAvatar).toBeVisible();
+  });
+
+  test("should be able to open user menu", async () => {
+    await homePage.goto();
+
+    // Open user menu
+    await homePage.openUserMenu();
+
+    // Verify menu is visible
+    await expect(homePage.accountSettingsMenu).toBeVisible();
+  });
+
+  test("should be able to purchase $10 credits via Stripe @critical", async ({
+    page,
+  }) => {
+    // Navigate to home and open user menu
+    await homePage.goto();
+    await homePage.openUserMenu();
+
+    // Click on Billing link in the user menu
+    const billingLink = page.getByRole("link", { name: /billing/i });
+    await billingLink.click();
+
+    // Wait for billing page to load
+    await page.waitForURL(/\/settings\/billing/, { timeout: 30_000 });
+    await expect(page.getByTestId("billing-settings")).toBeVisible({
+      timeout: 10_000,
+    });
+
+    // Capture initial balance
+    const balanceElement = page.getByTestId("user-balance");
+    await expect(balanceElement).toBeVisible({ timeout: 10_000 });
+    const initialBalanceText = await balanceElement.textContent();
+    const initialBalance = parseFloat(
+      initialBalanceText?.replace("$", "") || "0",
+    );
+    console.log(`Initial balance: $${initialBalance.toFixed(2)}`);
+
+    // Enter $10 in the Add Funds input
+    const topUpInput = page.getByTestId("top-up-input");
+    await topUpInput.fill("10");
+
+    // Click Add Credit button
+    const addCreditButton = page.getByRole("button", { name: /add credit/i });
+    await expect(addCreditButton).toBeEnabled({ timeout: 5_000 });
+    await addCreditButton.click();
+
+    // Wait for redirect to Stripe checkout
+    await page.waitForURL(/checkout\.stripe\.com/, { timeout: 30_000 });
+    console.log("Redirected to Stripe checkout");
+
+    // Wait for the Pay button to be present (indicates form is ready)
+    const payButton = page.locator(".SubmitButton");
+    await payButton.waitFor({ state: "attached", timeout: 30_000 });
+    console.log("Stripe checkout form loaded");
+
+    // Fill in card number
+    const cardNumberInput = page.locator("#cardNumber");
+    await cardNumberInput.fill("5105105105105100");
+
+    // Fill in expiry date
+    const cardExpiryInput = page.locator("#cardExpiry");
+    await cardExpiryInput.fill("12/35");
+
+    // Fill in CVC
+    const cardCvcInput = page.locator("#cardCvc");
+    await cardCvcInput.fill("123");
+
+    // Fill in cardholder name
+    const billingNameInput = page.locator("#billingName");
+    await billingNameInput.fill("Testy Tester");
+
+    // Fill in ZIP code
+    const postalCodeInput = page.locator("#billingPostalCode");
+    await postalCodeInput.fill("12345");
+
+    // Take screenshot of filled Stripe form
+    await page.screenshot({
+      path: "test-results/screenshots/stripe-checkout-filled.png",
+    });
+
+    // Click Pay button
+    await payButton.click();
+
+    // Wait for redirect back to billing page
+    await page.waitForURL(/\/settings\/billing/, { timeout: 60_000 });
+    console.log("Returned to billing page after payment");
+
+    // Wait for balance to update (may need to wait for API refresh)
+    await page.waitForTimeout(2000);
+
+    // Verify balance increased by $10
+    await expect(balanceElement).toBeVisible({ timeout: 10_000 });
+    const newBalanceText = await balanceElement.textContent();
+    const newBalance = parseFloat(newBalanceText?.replace("$", "") || "0");
+    console.log(`New balance: $${newBalance.toFixed(2)}`);
+
+    const expectedBalance = initialBalance + 10;
+    expect(newBalance).toBeCloseTo(expectedBalance, 2);
+    console.log(
+      `Balance increased by $10: $${initialBalance.toFixed(2)} -> $${newBalance.toFixed(2)}`,
+    );
+
+    // Take screenshot of updated balance
+    await page.screenshot({
+      path: "test-results/screenshots/billing-after-payment.png",
+    });
+  });
+
+  test("should be able to start a conversation, send a prompt, and receive response @critical", async ({
+    page,
+  }) => {
+    // Navigate to home
+    await homePage.goto();
+
+    if (TEST_REPO_URL) {
+      // Select repository if repo selection is available
+      try {
+        await homePage.selectRepository(TEST_REPO_URL);
+        console.log(`Selected repository: ${TEST_REPO_URL}`);
+      } catch (e) {
+        console.log(
+          "Repository selection not available or failed, continuing...",
+        );
+      }
+      // Start a new conversation
+      await homePage.startNewConversation("repo-launch-button");
+    } else {
+      await homePage.startNewConversation("launch-new-conversation-button");
+    }
+
+    // Wait for conversation page to load
+    await page.waitForTimeout(2000); // Allow navigation to complete
+
+    // Initialize conversation page
+    conversationPage = new ConversationPage(page);
+
+    // Wait for the agent to be ready
+    await conversationPage.waitForConversationReady();
+
+    // Verify chat interface is available
+    await expect(conversationPage.chatBox).toBeVisible();
+    await expect(conversationPage.chatInput).toBeVisible();
+
+    // Take screenshot before sending message
+    await page.screenshot({
+      path: "test-results/screenshots/conversation-ready.png",
+    });
+
+    // Execute the test prompt
+    const prompt = "Reverse the word 'hello'";
+    console.log(`Sending prompt: "${prompt}"`);
+    await conversationPage.executePrompt(prompt, 120_000);
+
+    // Wait for a message containing the expected reversed word
+    const message = await conversationPage.waitForMessageContaining(
+      "olleh",
+      120_000,
+    );
+    console.log(
+      `Found expected response containing 'olleh': "${message.substring(0, 100)}..."`,
+    );
+
+    // Take screenshot of successful response
+    await page.screenshot({
+      path: "test-results/screenshots/agent-response.png",
+    });
+
+    console.log("Smoke test passed: Agent correctly reversed the word");
+  });
+
+  test("should be able to navigate to a running conversation @critical", async ({
+    page,
+  }) => {
+    // Navigate to home page
+    await homePage.goto();
+
+    // Click on the first conversation in the recent conversations list
+    await homePage.clickFirstConversation();
+
+    // Initialize conversation page
+    conversationPage = new ConversationPage(page);
+
+    // Wait for the conversation to be ready by checking for "Waiting for task" status
+    await conversationPage.waitForConversationReady();
+
+    // Take screenshot of successful navigation
+    await page.screenshot({
+      path: "test-results/screenshots/navigated-conversation.png",
+    });
+
+    console.log("Successfully navigated to running conversation");
+  });
+
+  test("should be able to use Tavily search and get accurate response", async ({
+    page,
+  }) => {
+    // Navigate to home page
+    await homePage.goto();
+
+    // Click on the first conversation in the recent conversations list
+    await homePage.clickFirstConversation();
+
+    // Initialize conversation page
+    conversationPage = new ConversationPage(page);
+
+    // Wait for the agent to be ready
+    await conversationPage.waitForConversationReady();
+
+    // Send the Tavily search prompt
+    const prompt =
+      "Using Tavily search, please tell me who is the prime minister of Ireland.";
+    console.log(`Sending prompt: "${prompt}"`);
+    await conversationPage.executePrompt(prompt, 180_000); // Longer timeout for search
+
+    // Wait for a message containing the expected answer
+    const message = await conversationPage.waitForMessageContaining(
+      "Micheál Martin",
+      180_000,
+    );
+    console.log(
+      `Found expected response containing 'Micheál Martin': "${message.substring(0, 100)}..."`,
+    );
+
+    // Take screenshot of successful response
+    await page.screenshot({
+      path: "test-results/screenshots/tavily-search-response.png",
+    });
+
+    console.log(
+      "Tavily search test passed: Agent correctly identified the Prime Minister of Ireland",
+    );
+  });
+
+  test("should be able to create API key and use it to access the API @critical", async ({
+    page,
+    request,
+    baseURL,
+  }) => {
+    const API_KEY_NAME = "Integration Test Key";
+
+    // Navigate to home and open user menu
+    await homePage.goto();
+    await homePage.openUserMenu();
+
+    // Click on API Keys link in the user menu
+    const apiKeysLink = page.getByRole("link", { name: /api keys/i });
+    await apiKeysLink.click();
+
+    // Wait for API Keys page to load
+    await page.waitForURL(/\/settings\/api-keys/, { timeout: 30_000 });
+    console.log("Navigated to API Keys page");
+
+    // Verify "Refresh API Key" button is visible (indicates user has credits)
+    const refreshApiKeyButton = page.getByRole("button", { name: /refresh/i });
+    await expect(refreshApiKeyButton).toBeVisible({ timeout: 10_000 });
+    console.log("Refresh API Key button is visible - user has credits");
+
+    // Delete any existing "Integration Test Key" if it exists
+    const existingKeyRow = page.locator("tr", { hasText: API_KEY_NAME });
+    if (await existingKeyRow.isVisible({ timeout: 2_000 }).catch(() => false)) {
+      console.log(`Found existing "${API_KEY_NAME}", deleting it...`);
+      const deleteButton = existingKeyRow.locator(
+        'button[aria-label^="Delete"]',
+      );
+      await deleteButton.click();
+
+      // Confirm deletion in modal
+      const deleteModal = page.getByTestId("delete-api-key-modal");
+      await expect(deleteModal).toBeVisible({ timeout: 5_000 });
+      // This is so ugly. :(
+      const confirmDeleteButton = deleteModal
+        .locator("xpath=..")
+        .getByRole("button")
+        .first();
+      await confirmDeleteButton.click();
+
+      // Wait for modal to close
+      await expect(deleteModal).not.toBeVisible({ timeout: 5_000 });
+      console.log(`Deleted existing "${API_KEY_NAME}"`);
+
+      // Wait for the page to settle after deletion
+      await page.waitForTimeout(1000);
+    }
+
+    // Click "Create API Key" button
+    const createApiKeyButton = page.getByRole("button", {
+      name: /create api key/i,
+    });
+    await expect(createApiKeyButton).toBeVisible({ timeout: 10_000 });
+    await createApiKeyButton.click();
+
+    // Wait for create modal to appear
+    const createModal = page.getByTestId("create-api-key-modal");
+    await expect(createModal).toBeVisible({ timeout: 5_000 });
+
+    // Enter the key name
+    const nameInput = page.getByTestId("api-key-name-input");
+    await nameInput.fill(API_KEY_NAME);
+
+    // Click Create button
+    const createButton = page.getByRole("button", { name: /^create$/i });
+    await createButton.click();
+
+    // Wait for the new key modal to appear with the generated key
+    const newKeyModal = page.getByTestId("new-api-key-modal");
+    await expect(newKeyModal).toBeVisible({ timeout: 10_000 });
+
+    // Capture the API key from the modal
+    const keyDisplay = newKeyModal.locator(".font-mono");
+    const apiKey = await keyDisplay.textContent();
+    expect(apiKey).toBeTruthy();
+    console.log(`Created API key: ${apiKey?.substring(0, 20)}...`);
+
+    // Close the modal
+    const closeButton = page.getByRole("button", { name: /close/i });
+    await closeButton.click();
+    await expect(newKeyModal).not.toBeVisible({ timeout: 5_000 });
+
+    // Take screenshot of API keys page
+    await page.screenshot({
+      path: "test-results/screenshots/api-keys-created.png",
+    });
+
+    // Test the API key by making a request to /api/v1/sandboxes/search
+    console.log("Testing API key with sandboxes search endpoint...");
+    const response = await request.get(`${baseURL}/api/v1/sandboxes/search`, {
+      headers: {
+        "X-Access-Token": apiKey!,
+      },
+    });
+
+    // Verify the response
+    expect(response.ok()).toBe(true);
+    const responseBody = await response.json();
+    console.log(
+      `Sandboxes search response: ${JSON.stringify(responseBody).substring(0, 200)}...`,
+    );
+
+    // Verify we got at least 1 sandbox (the currently running one)
+    // Response format: { items: [], next_page_id: string | null }
+    expect(responseBody).toHaveProperty("items");
+    expect(Array.isArray(responseBody.items)).toBe(true);
+    expect(responseBody.items.length).toBeGreaterThanOrEqual(1);
+    console.log(
+      `Found ${responseBody.items.length} sandbox(es) - API key works!`,
+    );
+
+    // Take screenshot after API test
+    await page.screenshot({
+      path: "test-results/screenshots/api-key-test-complete.png",
+    });
+  });
+});
+
+test.describe("Health Check Tests @smoke", () => {
+  test("application should be accessible", async ({ page, baseURL }) => {
+    const response = await page.goto(baseURL || "/");
+
+    // Verify we got a successful response
+    expect(response?.status()).toBeLessThan(400);
+  });
+
+  test("application should not have console errors on load", async ({
+    page,
+  }) => {
+    const errors: string[] = [];
+
+    page.on("console", (msg) => {
+      if (msg.type() === "error") {
+        // Filter out known acceptable errors
+        const text = msg.text();
+        if (
+          !text.includes("favicon") &&
+          !text.includes("sourcemap") &&
+          !text.includes("DevTools")
+        ) {
+          errors.push(text);
+        }
+      }
+    });
+
+    await page.goto("/");
+    await page
+      .waitForLoadState("networkidle", { timeout: 30_000 })
+      .catch(() => {});
+
+    // Log any errors found
+    if (errors.length > 0) {
+      console.log("Console errors found:", errors);
+    }
+
+    // Fail if critical errors exist
+    const criticalErrors = errors.filter(
+      (e) =>
+        e.includes("TypeError") ||
+        e.includes("ReferenceError") ||
+        e.includes("SyntaxError"),
+    );
+    expect(criticalErrors).toHaveLength(0);
+  });
+});
+
+test.describe("Environment Validation @smoke", () => {
+  test("should be connected to correct environment", async ({
+    page,
+    baseURL,
+  }) => {
+    await page.goto("/");
+
+    // Log the current environment for verification
+    console.log(`Testing against: ${baseURL}`);
+
+    // Verify we're on the expected domain
+    const url = page.url();
+    expect(url).toContain(new URL(baseURL || "").hostname);
+  });
+
+  test("should have valid SSL certificate", async ({ page, baseURL }) => {
+    // This test implicitly validates SSL because ignoreHTTPSErrors is true
+    // but we still want to verify the connection works
+    const response = await page.goto(baseURL || "/");
+    expect(response?.ok()).toBe(true);
+  });
+});
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "declaration": false,
+    "declarationMap": false,
+    "noEmit": true,
+    "types": ["node"],
+    "baseUrl": ".",
+    "paths": {
+      "@pages/*": ["pages/*"],
+      "@utils/*": ["utils/*"],
+      "@fixtures/*": ["fixtures/*"]
+    }
+  },
+  "include": ["**/*.ts"],
+  "exclude": ["node_modules", "playwright-report", "test-results"]
+}
@@ -0,0 +1 @@
+export * from "./test-helpers";
@@ -0,0 +1,179 @@
+import { Page } from "@playwright/test";
+
+/**
+ * Utility functions for integration tests
+ */
+
+/**
+ * Wait for a condition to be true with polling
+ */
+export async function waitForCondition(
+  condition: () => Promise<boolean>,
+  options: {
+    timeout?: number;
+    interval?: number;
+    message?: string;
+  } = {},
+): Promise<void> {
+  const {
+    timeout = 30_000,
+    interval = 500,
+    message = "Condition not met",
+  } = options;
+  const startTime = Date.now();
+
+  while (Date.now() - startTime < timeout) {
+    if (await condition()) {
+      return;
+    }
+    await new Promise((resolve) => setTimeout(resolve, interval));
+  }
+
+  throw new Error(`${message} within ${timeout}ms`);
+}
+
+/**
+ * Retry a function with exponential backoff
+ */
+export async function retry<T>(
+  fn: () => Promise<T>,
+  options: {
+    maxRetries?: number;
+    baseDelay?: number;
+    maxDelay?: number;
+  } = {},
+): Promise<T> {
+  const { maxRetries = 3, baseDelay = 1000, maxDelay = 10000 } = options;
+
+  let lastError: Error | undefined;
+
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error as Error;
+      if (attempt < maxRetries - 1) {
+        const delay = Math.min(baseDelay * 2 ** attempt, maxDelay);
+        console.log(
+          `Retry attempt ${attempt + 1}/${maxRetries} after ${delay}ms`,
+        );
+        await new Promise((resolve) => setTimeout(resolve, delay));
+      }
+    }
+  }
+
+  throw lastError;
+}
+
+/**
+ * Generate a unique test identifier
+ */
+export function generateTestId(): string {
+  return `test-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+}
+
+/**
+ * Log test step with timestamp
+ */
+export function logStep(step: string): void {
+  const timestamp = new Date().toISOString();
+  console.log(`[${timestamp}] ${step}`);
+}
+
+/**
+ * Take a screenshot with a descriptive name
+ */
+export async function takeScreenshot(
+  page: Page,
+  name: string,
+  options: { fullPage?: boolean } = {},
+): Promise<void> {
+  const timestamp = Date.now();
+  const sanitizedName = name.replace(/[^a-zA-Z0-9-_]/g, "-");
+  await page.screenshot({
+    path: `test-results/screenshots/${sanitizedName}-${timestamp}.png`,
+    fullPage: options.fullPage ?? true,
+  });
+}
+
+/**
+ * Wait for no console errors during an action
+ */
+export async function expectNoConsoleErrors(
+  page: Page,
+  action: () => Promise<void>,
+): Promise<void> {
+  const errors: string[] = [];
+
+  const handler = (msg: import("@playwright/test").ConsoleMessage) => {
+    if (msg.type() === "error") {
+      const text = msg.text();
+      // Filter known acceptable errors
+      if (!text.includes("favicon") && !text.includes("sourcemap")) {
+        errors.push(text);
+      }
+    }
+  };
+
+  page.on("console", handler);
+
+  try {
+    await action();
+  } finally {
+    page.off("console", handler);
+  }
+
+  if (errors.length > 0) {
+    throw new Error(`Console errors detected:\n${errors.join("\n")}`);
+  }
+}
+
+/**
+ * Environment helper to get environment-specific values
+ */
+export const env = {
+  baseUrl: process.env.BASE_URL || "https://staging.all-hands.dev",
+  testEnv: process.env.TEST_ENV || "staging",
+  testRepoUrl:
+    process.env.TEST_REPO_URL || "https://github.com/OpenHands/deploy",
+  testPrompt: process.env.TEST_PROMPT || "Flip a coin!",
+  isCI: process.env.CI === "true",
+
+  getFeatureBranchUrl(branchName: string): string {
+    // Sanitize branch name for URL
+    const sanitized = branchName.replace(/[^a-zA-Z0-9-]/g, "-").toLowerCase();
+    return `https://${sanitized}.staging.all-hands.dev`;
+  },
+};
+
+/**
+ * Check if running in a specific environment
+ */
+export function isEnvironment(
+  env: "staging" | "production" | "local",
+): boolean {
+  const baseUrl = process.env.BASE_URL || "";
+
+  switch (env) {
+    case "staging":
+      return baseUrl.includes("staging.all-hands.dev");
+    case "production":
+      return baseUrl.includes("app.all-hands.dev");
+    case "local":
+      return baseUrl.includes("localhost");
+    default:
+      return false;
+  }
+}
+
+/**
+ * Skip test in specific environments
+ */
+export function skipInEnvironment(
+  test: { skip: (condition: boolean, message: string) => void },
+  envs: ("staging" | "production" | "local")[],
+  reason: string,
+): void {
+  const shouldSkip = envs.some(isEnvironment);
+  test.skip(shouldSkip, `Skipped in ${envs.join(", ")}: ${reason}`);
+}
@@ -49,6 +49,7 @@ from openhands.app_server.app_conversation.app_conversation_service import (
 )
 from openhands.app_server.app_conversation.app_conversation_service_base import (
    AppConversationServiceBase,
+    get_project_dir,
 )
 from openhands.app_server.app_conversation.app_conversation_start_task_service import (
    AppConversationStartTaskService,
@@ -540,10 +541,13 @@ async def get_conversation_skills(
        # Prefer the shared loader to avoid duplication; otherwise return empty list.
        all_skills: list = []
        if isinstance(app_conversation_service, AppConversationServiceBase):
+            project_dir = get_project_dir(
+                sandbox_spec.working_dir, conversation.selected_repository
+            )
            all_skills = await app_conversation_service.load_and_merge_all_skills(
                sandbox,
                conversation.selected_repository,
-                sandbox_spec.working_dir,
+                project_dir,
                agent_server_url,
            )

@@ -47,6 +47,40 @@ PRE_COMMIT_HOOK = '.git/hooks/pre-commit'
 PRE_COMMIT_LOCAL = '.git/hooks/pre-commit.local'


+def get_project_dir(
+    working_dir: str,
+    selected_repository: str | None = None,
+) -> str:
+    """Get the project root directory for a conversation.
+
+    When a repository is selected, the project root is the cloned repo directory
+    at {working_dir}/{repo_name}.  This is the directory that contains the
+    `.openhands/` configuration (setup.sh, pre-commit.sh, skills/, etc.).
+
+    Without a repository, the project root is the working_dir itself.
+
+    This must be used consistently for ALL features that depend on the project root:
+    - workspace.working_dir (terminal CWD, file editor root, etc.)
+    - .openhands/setup.sh execution
+    - .openhands/pre-commit.sh (git hooks setup)
+    - .openhands/skills/ (project skills)
+    - PLAN.md path
+
+    Args:
+        working_dir: Base working directory path in the sandbox
+            (e.g., '/workspace/project' from sandbox_spec)
+        selected_repository: Repository name (e.g., 'OpenHands/software-agent-sdk')
+            If provided, the repo name is appended to working_dir.
+
+    Returns:
+        The project root directory path.
+    """
+    if selected_repository:
+        repo_name = selected_repository.split('/')[-1]
+        return f'{working_dir}/{repo_name}'
+    return working_dir
+
+
@dataclass
 class AppConversationServiceBase(AppConversationService, ABC):
    """App Conversation service which adds git specific functionality.
@@ -61,7 +95,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
        self,
        sandbox: SandboxInfo,
        selected_repository: str | None,
-        working_dir: str,
+        project_dir: str,
        agent_server_url: str,
    ) -> list[Skill]:
        """Load skills from all sources via the agent-server.
@@ -77,7 +111,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
        Args:
            sandbox: SandboxInfo containing exposed URLs and agent-server URL
            selected_repository: Repository name or None
-            working_dir: Working directory path
+            project_dir: Project root directory (resolved via get_project_dir).
            agent_server_url: Agent-server URL (required)

        Returns:
@@ -96,12 +130,6 @@ class AppConversationServiceBase(AppConversationService, ABC):
            # Build sandbox config (exposed URLs)
            sandbox_config = build_sandbox_config(sandbox)

-            # Determine project directory for project skills
-            project_dir = working_dir
-            if selected_repository:
-                repo_name = selected_repository.split('/')[-1]
-                project_dir = f'{working_dir}/{repo_name}'
-
            # Single API call to agent-server for ALL skills
            all_skills = await load_skills_from_agent_server(
                agent_server_url=agent_server_url,
@@ -180,24 +208,25 @@ class AppConversationServiceBase(AppConversationService, ABC):
        agent: Agent,
        remote_workspace: AsyncRemoteWorkspace,
        selected_repository: str | None,
-        working_dir: str,
+        project_dir: str,
    ):
        """Load all skills and update agent with them.

        Args:
            agent: The agent to update
            remote_workspace: AsyncRemoteWorkspace for loading repo skills
-            selected_repository: Repository name or None
-            working_dir: Working directory path
+            selected_repository: Repository name or None (used for org config)
+            project_dir: Project root directory (already resolved via get_project_dir).

        Returns:
            Updated agent with skills loaded into context
        """
-        # Load and merge all skills
-        # Extract agent_server_url from remote_workspace host
        agent_server_url = remote_workspace.host
        all_skills = await self.load_and_merge_all_skills(
-            sandbox, selected_repository, working_dir, agent_server_url
+            sandbox,
+            selected_repository,
+            project_dir,
+            agent_server_url,
        )

        # Update agent with skills
@@ -216,20 +245,27 @@ class AppConversationServiceBase(AppConversationService, ABC):
        yield task
        await self.clone_or_init_git_repo(task, workspace)

+        # Compute the project root — the cloned repo directory when a repo is
+        # selected, or the sandbox working_dir otherwise.  This must be used
+        # for all .openhands/ features (setup.sh, pre-commit.sh, skills).
+        project_dir = get_project_dir(
+            workspace.working_dir, task.request.selected_repository
+        )
+
        task.status = AppConversationStartTaskStatus.RUNNING_SETUP_SCRIPT
        yield task
-        await self.maybe_run_setup_script(workspace)
+        await self.maybe_run_setup_script(workspace, project_dir)

        task.status = AppConversationStartTaskStatus.SETTING_UP_GIT_HOOKS
        yield task
-        await self.maybe_setup_git_hooks(workspace)
+        await self.maybe_setup_git_hooks(workspace, project_dir)

        task.status = AppConversationStartTaskStatus.SETTING_UP_SKILLS
        yield task
        await self.load_and_merge_all_skills(
            sandbox,
            task.request.selected_repository,
-            workspace.working_dir,
+            project_dir,
            agent_server_url,
        )

@@ -334,26 +370,35 @@ class AppConversationServiceBase(AppConversationService, ABC):
    async def maybe_run_setup_script(
        self,
        workspace: AsyncRemoteWorkspace,
+        project_dir: str,
    ):
-        """Run .openhands/setup.sh if it exists in the workspace or repository."""
-        setup_script = workspace.working_dir + '/.openhands/setup.sh'
+        """Run .openhands/setup.sh if it exists in the project root.
+
+        Args:
+            workspace: Remote workspace for command execution.
+            project_dir: Project root directory (repo root when a repo is selected).
+        """
+        setup_script = project_dir + '/.openhands/setup.sh'

        await workspace.execute_command(
-            f'chmod +x {setup_script} && source {setup_script}', timeout=600
+            f'chmod +x {setup_script} && source {setup_script}',
+            cwd=project_dir,
+            timeout=600,
        )

-        # TODO: Does this need to be done?
-        # Add the action to the event stream as an ENVIRONMENT event
-        # source = EventSource.ENVIRONMENT
-        # self.event_stream.add_event(action, source)
-
    async def maybe_setup_git_hooks(
        self,
        workspace: AsyncRemoteWorkspace,
+        project_dir: str,
    ):
-        """Set up git hooks if .openhands/pre-commit.sh exists in the workspace or repository."""
+        """Set up git hooks if .openhands/pre-commit.sh exists in the project root.
+
+        Args:
+            workspace: Remote workspace for command execution.
+            project_dir: Project root directory (repo root when a repo is selected).
+        """
        command = 'mkdir -p .git/hooks && chmod +x .openhands/pre-commit.sh'
-        result = await workspace.execute_command(command, workspace.working_dir)
+        result = await workspace.execute_command(command, project_dir)
        if result.exit_code:
            return

@@ -369,9 +414,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
                        f'mv {PRE_COMMIT_HOOK} {PRE_COMMIT_LOCAL} &&'
                        f'chmod +x {PRE_COMMIT_LOCAL}'
                    )
-                    result = await workspace.execute_command(
-                        command, workspace.working_dir
-                    )
+                    result = await workspace.execute_command(command, project_dir)
                    if result.exit_code != 0:
                        _logger.error(
                            f'Failed to preserve existing pre-commit hook: {result.stderr}',
@@ -41,6 +41,7 @@ from openhands.app_server.app_conversation.app_conversation_service import (
 )
 from openhands.app_server.app_conversation.app_conversation_service_base import (
    AppConversationServiceBase,
+    get_project_dir,
 )
 from openhands.app_server.app_conversation.app_conversation_start_task_service import (
    AppConversationStartTaskService,
@@ -1227,7 +1228,12 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
        5. Passing plugins to the agent server for remote plugin loading
        """
        user = await self.user_context.get_user_info()
-        workspace = LocalWorkspace(working_dir=working_dir)
+
+        # Compute the project root — this is the repo directory when a repo is
+        # selected, or the sandbox working_dir otherwise.  All tools, hooks,
+        # setup scripts, and plan paths must use this consistently.
+        project_dir = get_project_dir(working_dir, selected_repository)
+        workspace = LocalWorkspace(working_dir=project_dir)

        # Set up secrets for all git providers
        secrets = await self._setup_secrets_for_git_providers(user)
@@ -1244,7 +1250,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
            user.condenser_max_size,
            secrets=secrets,
            git_provider=git_provider,
-            working_dir=working_dir,
+            working_dir=project_dir,
        )

        # Finalize and return the conversation request
@@ -1258,7 +1264,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
            sandbox,
            remote_workspace,
            selected_repository,
-            working_dir,
+            project_dir,
            plugins=plugins,
        )

@@ -216,13 +216,18 @@ class BitbucketDCMixinBase(BaseGitService, HTTPClient):
        )

    async def _parse_repository(
-        self, repo: dict, link_header: str | None = None
+        self,
+        repo: dict,
+        link_header: str | None = None,
+        fetch_default_branch: bool = False,
    ) -> Repository:
        """Parse a Bitbucket data center API repository response into a Repository object.

        Args:
            repo: Repository data from Bitbucket data center API
            link_header: Optional link header for pagination
+            fetch_default_branch: Whether to make an additional API call to fetch the
+                default branch. Set to False for listing endpoints to avoid N+1 queries.

        Returns:
            Repository object
@@ -240,14 +245,15 @@ class BitbucketDCMixinBase(BaseGitService, HTTPClient):
        is_public = repo.get('public', False)

        main_branch: str | None = None
-        try:
-            default_branch_url = (
-                f'{self._repo_api_base(project_key, repo_slug)}/default-branch'
-            )
-            default_branch_data, _ = await self._make_request(default_branch_url)
-            main_branch = default_branch_data.get('displayId') or None
-        except Exception as e:
-            logger.debug(f'Could not fetch default branch for {full_name}: {e}')
+        if fetch_default_branch:
+            try:
+                default_branch_url = (
+                    f'{self._repo_api_base(project_key, repo_slug)}/default-branch'
+                )
+                default_branch_data, _ = await self._make_request(default_branch_url)
+                main_branch = default_branch_data.get('displayId') or None
+            except Exception as e:
+                logger.debug(f'Could not fetch default branch for {full_name}: {e}')

        return Repository(
            id=str(repo.get('id', '')),
@@ -275,7 +281,7 @@ class BitbucketDCMixinBase(BaseGitService, HTTPClient):
        owner, repo = self._extract_owner_and_repo(repository)
        url = self._repo_api_base(owner, repo)
        data, _ = await self._make_request(url)
-        return await self._parse_repository(data)
+        return await self._parse_repository(data, fetch_default_branch=True)

    async def _get_cursorrules_url(self, repository: str) -> str:
        """Get the URL for checking .cursorrules file."""
@@ -115,6 +115,7 @@ REASONING_EFFORT_PATTERNS: list[str] = [
    'o4-mini-2025-04-16',
    'gemini-2.5-flash',
    'gemini-2.5-pro',
+    'gemini-3.1-pro*',
    'gpt-5*',
    # DeepSeek reasoning family
    'deepseek-r1-0528*',
@@ -139,6 +140,7 @@ PROMPT_CACHE_PATTERNS: list[str] = [
    'claude-3-opus-20240229',
    'claude-sonnet-4*',
    'claude-opus-4*',
+    'gemini-3.1-pro*',
    # Kimi series - verified via litellm config
    'kimi-k2.5',
    # GLM series - verified via litellm config
@@ -22,6 +22,7 @@ OPENHANDS_MODELS = [
    'openhands/gpt-5.2',
    'openhands/minimax-m2.5',
    'openhands/gemini-3-pro-preview',
+    'openhands/gemini-3.1-pro-preview',
    'openhands/gemini-3-flash-preview',
    'openhands/deepseek-chat',
    'openhands/devstral-medium-2512',
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.

 [[package]]
 name = "agent-client-protocol"
@@ -3468,7 +3468,7 @@ files = [
 [package.dependencies]
 googleapis-common-protos = ">=1.5.5"
 grpcio = ">=1.71.2"
-protobuf = ">=5.26.1,<6.0.dev0"
+protobuf = ">=5.26.1,<6.0dev"

 [[package]]
 name = "grpclib"
@@ -3836,7 +3836,7 @@ pfzy = ">=0.3.1,<0.4.0"
 prompt-toolkit = ">=3.0.1,<4.0.0"

 [package.extras]
-docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]
+docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]

 [[package]]
 name = "installer"
@@ -4287,7 +4287,7 @@ fqdn = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 idna = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 isoduration = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 jsonpointer = {version = ">1.13", optional = true, markers = "extra == \"format-nongpl\""}
-jsonschema-specifications = ">=2023.3.6"
+jsonschema-specifications = ">=2023.03.6"
 referencing = ">=0.28.4"
 rfc3339-validator = {version = "*", optional = true, markers = "extra == \"format-nongpl\""}
 rfc3986-validator = {version = ">0.1.0", optional = true, markers = "extra == \"format-nongpl\""}
@@ -4752,7 +4752,7 @@ files = [
 ]

 [package.dependencies]
-certifi = ">=14.5.14"
+certifi = ">=14.05.14"
 durationpy = ">=0.7"
 google-auth = ">=1.0.1"
 oauthlib = ">=3.2.2"
@@ -6942,7 +6942,6 @@ files = [
    {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
    {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
 ]
-markers = {runtime = "sys_platform != \"win32\" and sys_platform != \"emscripten\""}

 [package.dependencies]
 ptyprocess = ">=0.5"
@@ -6960,7 +6959,7 @@ files = [
 ]

 [package.extras]
-docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17b43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]
+docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"]

 [[package]]
 name = "pg8000"
@@ -7517,7 +7516,6 @@ files = [
    {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
    {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
 ]
-markers = {runtime = "sys_platform != \"win32\" and sys_platform != \"emscripten\" or os_name != \"nt\""}

 [[package]]
 name = "pure-eval"
@@ -7546,6 +7544,18 @@ files = [
    {file = "puremagic-1.30.tar.gz", hash = "sha256:f9ff7ac157d54e9cf3bff1addfd97233548e75e685282d84ae11e7ffee1614c9"},
 ]

+[[package]]
+name = "py"
+version = "1.11.0"
+description = "library with cross-python path, ini-parsing, io, code, log facilities"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["test"]
+files = [
+    {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
+    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
+]
+
 [[package]]
 name = "py-key-value-aio"
 version = "0.4.4"
@@ -11567,14 +11577,14 @@ diagrams = ["jinja2", "railroad-diagrams"]

 [[package]]
 name = "pypdf"
-version = "6.7.5"
+version = "6.8.0"
 description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13"},
-    {file = "pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d"},
+    {file = "pypdf-6.8.0-py3-none-any.whl", hash = "sha256:2a025080a8dd73f48123c89c57174a5ff3806c71763ee4e49572dc90454943c7"},
+    {file = "pypdf-6.8.0.tar.gz", hash = "sha256:cb7eaeaa4133ce76f762184069a854e03f4d9a08568f0e0623f7ea810407833b"},
 ]

 [package.extras]
@@ -11690,6 +11700,22 @@ pytest = ">=7"
 [package.extras]
 testing = ["process-tests", "pytest-xdist", "virtualenv"]

+[[package]]
+name = "pytest-forked"
+version = "1.6.0"
+description = "run tests in isolated forked subprocesses"
+optional = false
+python-versions = ">=3.7"
+groups = ["test"]
+files = [
+    {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"},
+    {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"},
+]
+
+[package.dependencies]
+py = "*"
+pytest = ">=3.10"
+
 [[package]]
 name = "pytest-playwright"
 version = "0.7.2"
@@ -12875,10 +12901,10 @@ files = [
 ]

 [package.dependencies]
-botocore = ">=1.37.4,<2.0a0"
+botocore = ">=1.37.4,<2.0a.0"

 [package.extras]
-crt = ["botocore[crt] (>=1.37.4,<2.0a0)"]
+crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]

 [[package]]
 name = "scantree"
@@ -14814,7 +14840,7 @@ files = [
 ]

 [package.extras]
-cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b0) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]
+cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]

 [extras]
 third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api-client"]
@@ -14822,4 +14848,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12,<3.14"
-content-hash = "54d8d1b20ca7d88287c479f43f7bbe0402c9202cbdb24c9d091b2c23245d6c47"
+content-hash = "7319bfec87aed5ed2803ad7cb947f875e83fa62216b1662a87b9b84078dc03e4"
@@ -130,6 +130,7 @@ test = [
  "pytest",
  "pytest-asyncio",
  "pytest-cov",
+  "pytest-forked",
  "pytest-playwright>=0.7",
  "pytest-timeout>=2.4",
  "pytest-xdist",
@@ -143,7 +144,7 @@ runtime = [

 [tool.poetry]
 name = "openhands-ai"
-version = "1.4.0"
+version = "1.5.0"
 description = "OpenHands: Code Less, Make More"
 authors = [ "OpenHands" ]
 license = "MIT"
@@ -280,6 +281,7 @@ optional = true
 pytest = "*"
 pytest-cov = "*"
 pytest-asyncio = "*"
+pytest-forked = "*"
 pytest-xdist = "*"
 pytest-playwright = "^0.7.0"
 pytest-timeout = "^2.4.0"
@@ -1035,7 +1035,7 @@ class TestLoadAndMergeAllSkills:

            # Act
            result = await service.load_and_merge_all_skills(
-                sandbox, 'owner/repo', '/workspace', 'http://localhost:8000'
+                sandbox, 'owner/repo', '/workspace/repo', 'http://localhost:8000'
            )

            # Assert
@@ -1073,7 +1073,7 @@ class TestLoadAndMergeAllSkills:
            # Act - pass empty string to simulate no agent server URL
            # This should still call load_skills_from_agent_server but it will fail
            result = await service.load_and_merge_all_skills(
-                sandbox, 'owner/repo', '/workspace', ''
+                sandbox, 'owner/repo', '/workspace/repo', ''
            )

            # Assert - should return empty list when agent_server_url is empty
@@ -1089,13 +1089,13 @@ class TestLoadAndMergeAllSkills:
    @patch(
        'openhands.app_server.app_conversation.app_conversation_service_base.build_sandbox_config'
    )
-    async def test_uses_working_dir_when_no_repository(
+    async def test_uses_project_dir_when_no_repository(
        self,
        mock_build_sandbox_config,
        mock_build_org_config,
        mock_load_skills,
    ):
-        """Test uses working_dir as project_dir when no repository is selected."""
+        """Test uses project_dir directly when no repository is selected."""
        # Arrange
        mock_user_context = Mock(spec=UserContext)
        with patch.object(AppConversationServiceBase, '__abstractmethods__', set()):
@@ -1164,7 +1164,7 @@ class TestLoadAndMergeAllSkills:

            # Act
            result = await service.load_and_merge_all_skills(
-                sandbox, 'owner/repo', '/workspace', 'http://localhost:8000'
+                sandbox, 'owner/repo', '/workspace/repo', 'http://localhost:8000'
            )

            # Assert
@@ -1199,6 +1199,9 @@ class TestLiveStatusAppConversationService:
        self.service._configure_llm_and_mcp.assert_called_once_with(
            self.mock_user, 'gpt-4'
        )
+        # When selected_repository='test/repo', project_dir is resolved
+        # to '/test/dir/repo' via get_project_dir.  All downstream calls
+        # (agent context, workspace, skills) must use this path.
        self.service._create_agent_with_context.assert_called_once_with(
            mock_llm,
            AgentType.DEFAULT,
@@ -1207,7 +1210,7 @@ class TestLiveStatusAppConversationService:
            self.mock_user.condenser_max_size,
            secrets=mock_secrets,
            git_provider=ProviderType.GITHUB,
-            working_dir='/test/dir',
+            working_dir='/test/dir/repo',
        )
        self.service._finalize_conversation_request.assert_called_once()

@@ -1989,6 +1992,111 @@ class TestLiveStatusAppConversationService:
        assert stdio_server['command'] == 'npx'
        assert stdio_server['env'] == {'TOKEN': 'value'}

+    # ------------------------------------------------------------------ #
+    #  Regression tests: workspace.working_dir == project_dir             #
+    # ------------------------------------------------------------------ #
+
+    def test_get_project_dir_with_repo(self):
+        """get_project_dir appends repo name to working_dir."""
+        from openhands.app_server.app_conversation.app_conversation_service_base import (
+            get_project_dir,
+        )
+
+        assert (
+            get_project_dir('/workspace/project', 'OpenHands/software-agent-sdk')
+            == '/workspace/project/software-agent-sdk'
+        )
+        assert get_project_dir('/w', 'org/repo-name') == '/w/repo-name'
+
+    def test_get_project_dir_without_repo(self):
+        """get_project_dir returns working_dir unchanged when no repo selected."""
+        from openhands.app_server.app_conversation.app_conversation_service_base import (
+            get_project_dir,
+        )
+
+        assert get_project_dir('/workspace/project', None) == '/workspace/project'
+        assert get_project_dir('/workspace/project', '') == '/workspace/project'
+
+    @pytest.mark.asyncio
+    async def test_build_request_workspace_uses_project_dir(self):
+        """workspace.working_dir in StartConversationRequest must equal project_dir.
+
+        This is the root cause of the V1 hook-stop bug: if workspace.working_dir
+        points to the sandbox mount root (/workspace/project) instead of the
+        cloned repo (/workspace/project/<repo>), the agent's CWD is wrong and
+        .openhands/hooks/on_stop.sh is not found.
+        """
+        self.mock_user_context.get_user_info.return_value = self.mock_user
+
+        mock_secrets = {'GITHUB_TOKEN': Mock()}
+        mock_llm = Mock(spec=LLM)
+        mock_agent = Mock(spec=Agent)
+
+        self.service._setup_secrets_for_git_providers = AsyncMock(
+            return_value=mock_secrets
+        )
+        self.service._configure_llm_and_mcp = AsyncMock(return_value=(mock_llm, {}))
+        self.service._create_agent_with_context = Mock(return_value=mock_agent)
+
+        captured = {}
+
+        async def capture_finalize(
+            agent, conversation_id, user, workspace, *args, **kwargs
+        ):
+            captured['workspace_working_dir'] = workspace.working_dir
+            return Mock(spec=StartConversationRequest)
+
+        self.service._finalize_conversation_request = AsyncMock(
+            side_effect=capture_finalize
+        )
+
+        await self.service._build_start_conversation_request_for_user(
+            sandbox=self.mock_sandbox,
+            initial_message=None,
+            system_message_suffix=None,
+            git_provider=None,
+            working_dir='/workspace/project',
+            selected_repository='OpenHands/software-agent-sdk',
+        )
+
+        assert (
+            captured['workspace_working_dir'] == '/workspace/project/software-agent-sdk'
+        ), 'workspace.working_dir must point to the repo root, not the sandbox mount'
+
+    @pytest.mark.asyncio
+    async def test_build_request_no_repo_workspace_unchanged(self):
+        """Without selected_repository, workspace.working_dir == sandbox working_dir."""
+        self.mock_user_context.get_user_info.return_value = self.mock_user
+
+        self.service._setup_secrets_for_git_providers = AsyncMock(return_value={})
+        self.service._configure_llm_and_mcp = AsyncMock(
+            return_value=(Mock(spec=LLM), {})
+        )
+        self.service._create_agent_with_context = Mock(return_value=Mock(spec=Agent))
+
+        captured = {}
+
+        async def capture_finalize(
+            agent, conversation_id, user, workspace, *args, **kwargs
+        ):
+            captured['workspace_working_dir'] = workspace.working_dir
+            return Mock(spec=StartConversationRequest)
+
+        self.service._finalize_conversation_request = AsyncMock(
+            side_effect=capture_finalize
+        )
+
+        await self.service._build_start_conversation_request_for_user(
+            sandbox=self.mock_sandbox,
+            initial_message=None,
+            system_message_suffix=None,
+            git_provider=None,
+            working_dir='/workspace/project',
+            selected_repository=None,
+        )
+
+        assert captured['workspace_working_dir'] == '/workspace/project'
+

 class TestPluginHandling:
    """Test cases for plugin-related functionality in LiveStatusAppConversationService."""
@@ -112,21 +112,15 @@ async def test_search_repositories_slash_query():
    query = 'PROJ/myrepo'

    mock_repo = _repo_dict('PROJ', slug='myrepo', name='My Repository')
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_fetch_paginated_data',
        new=AsyncMock(return_value=[mock_repo]),
    ) as mock_fetch:
-        with patch.object(
-            svc,
-            '_make_request',
-            new=AsyncMock(return_value=(mock_default_branch, {})),
-        ):
-            repos = await svc.search_repositories(
-                query, 25, 'name', 'asc', False, AppMode.SAAS
-            )
+        repos = await svc.search_repositories(
+            query, 25, 'name', 'asc', False, AppMode.SAAS
+        )

    mock_fetch.assert_called_once_with(
        'https://host.example.com/rest/api/1.0/projects/PROJ/repos',
@@ -135,6 +129,7 @@ async def test_search_repositories_slash_query():
    )
    assert len(repos) == 1
    assert repos[0].full_name == 'PROJ/myrepo'
+    assert repos[0].main_branch is None


@pytest.mark.asyncio
@@ -143,24 +138,19 @@ async def test_search_repositories_slash_query_filters_by_name():
    svc = make_service()
    matching = _repo_dict('PROJ', slug='proj-alpha', name='My Repository')
    non_matching = _repo_dict('PROJ', slug='proj-beta', name='Other Repo')
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_fetch_paginated_data',
        new=AsyncMock(return_value=[matching, non_matching]),
    ):
-        with patch.object(
-            svc,
-            '_make_request',
-            new=AsyncMock(return_value=(mock_default_branch, {})),
-        ):
-            repos = await svc.search_repositories(
-                'PROJ/my repository', 25, 'name', 'asc', False, AppMode.SAAS
-            )
+        repos = await svc.search_repositories(
+            'PROJ/my repository', 25, 'name', 'asc', False, AppMode.SAAS
+        )

    assert len(repos) == 1
    assert repos[0].full_name == 'PROJ/proj-alpha'
+    assert repos[0].main_branch is None


@pytest.mark.asyncio
@@ -169,24 +159,19 @@ async def test_search_repositories_slash_query_filters_by_slug():
    svc = make_service()
    matching = _repo_dict('PROJ', slug='my-repo', name='My Repository')
    non_matching = _repo_dict('PROJ', slug='other-repo', name='Other Repository')
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_fetch_paginated_data',
        new=AsyncMock(return_value=[matching, non_matching]),
    ):
-        with patch.object(
-            svc,
-            '_make_request',
-            new=AsyncMock(return_value=(mock_default_branch, {})),
-        ):
-            repos = await svc.search_repositories(
-                'PROJ/my-repo', 25, 'name', 'asc', False, AppMode.SAAS
-            )
+        repos = await svc.search_repositories(
+            'PROJ/my-repo', 25, 'name', 'asc', False, AppMode.SAAS
+        )

    assert len(repos) == 1
    assert repos[0].full_name == 'PROJ/my-repo'
+    assert repos[0].main_branch is None


 # ── get_paginated_repos ───────────────────────────────────────────────────────
@@ -199,18 +184,18 @@ async def test_get_paginated_repos_parses_values():
        'values': [_repo_dict()],
        'isLastPage': True,
    }
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_make_request',
-        side_effect=[(mock_response, {}), (mock_default_branch, {})],
+        return_value=(mock_response, {}),
    ):
        repos = await svc.get_paginated_repos(1, 25, 'name', 'PROJ')

    assert len(repos) == 1
    assert repos[0].full_name == 'PROJ/myrepo'
    assert repos[0].link_header == ''
+    assert repos[0].main_branch is None


@pytest.mark.asyncio
@@ -221,17 +206,17 @@ async def test_get_paginated_repos_has_next_page():
        'isLastPage': False,
        'nextPageStart': 25,
    }
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_make_request',
-        side_effect=[(mock_response, {}), (mock_default_branch, {})],
+        return_value=(mock_response, {}),
    ):
        repos = await svc.get_paginated_repos(1, 25, 'name', 'PROJ')

    assert len(repos) == 1
    assert 'rel="next"' in repos[0].link_header
+    assert repos[0].main_branch is None


@pytest.mark.asyncio
@@ -241,17 +226,17 @@ async def test_get_paginated_repos_last_page():
        'values': [_repo_dict()],
        'isLastPage': True,
    }
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_make_request',
-        side_effect=[(mock_response, {}), (mock_default_branch, {})],
+        return_value=(mock_response, {}),
    ):
        repos = await svc.get_paginated_repos(1, 25, 'name', 'PROJ')

    assert len(repos) == 1
    assert repos[0].link_header == ''
+    assert repos[0].main_branch is None


@pytest.mark.asyncio
@@ -265,17 +250,17 @@ async def test_get_paginated_repos_filters_by_slug():
        ],
        'isLastPage': True,
    }
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_make_request',
-        side_effect=[(mock_response, {}), (mock_default_branch, {})],
+        return_value=(mock_response, {}),
    ):
        repos = await svc.get_paginated_repos(1, 25, 'name', 'PROJ', query='my-repo')

    assert len(repos) == 1
    assert repos[0].full_name == 'PROJ/my-repo'
+    assert repos[0].main_branch is None


@pytest.mark.asyncio
@@ -289,12 +274,11 @@ async def test_get_paginated_repos_filters_by_name():
        ],
        'isLastPage': True,
    }
-    mock_default_branch = {'displayId': 'main'}

    with patch.object(
        svc,
        '_make_request',
-        side_effect=[(mock_response, {}), (mock_default_branch, {})],
+        return_value=(mock_response, {}),
    ):
        repos = await svc.get_paginated_repos(
            1, 25, 'name', 'PROJ', query='my repository'
@@ -302,6 +286,7 @@ async def test_get_paginated_repos_filters_by_name():

    assert len(repos) == 1
    assert repos[0].full_name == 'PROJ/proj-alpha'
+    assert repos[0].main_branch is None


 # ── get_all_repositories ──────────────────────────────────────────────────────
@@ -320,14 +305,14 @@ async def test_get_all_repositories_iterates_projects():
            return [_repo_dict('PROJ2', 'repo2')]
        return []

-    mock_default_branch = {'displayId': 'main'}
    with patch.object(svc, '_fetch_paginated_data', side_effect=fake_fetch):
-        with patch.object(svc, '_make_request', return_value=(mock_default_branch, {})):
-            repos = await svc.get_all_repositories('name', AppMode.SAAS)
+        repos = await svc.get_all_repositories('name', AppMode.SAAS)

    full_names = {r.full_name for r in repos}
    assert 'PROJ1/repo1' in full_names
    assert 'PROJ2/repo2' in full_names
+    for repo in repos:
+        assert repo.main_branch is None


 # ── get_installations ─────────────────────────────────────────────────────────
@@ -352,4 +337,4 @@ async def test_get_installations_returns_project_keys():
 async def _make_parsed_repo(svc, repo_dict):
    """Helper to create a parsed Repository from a repo dict (with mocked default branch)."""
    with patch.object(svc, '_make_request', return_value=({'displayId': 'main'}, {})):
-        return await svc._parse_repository(repo_dict)
+        return await svc._parse_repository(repo_dict, fetch_default_branch=True)
@@ -7383,11 +7383,11 @@ wheels = [

 [[package]]
 name = "pypdf"
-version = "6.7.5"
+version = "6.8.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f6/52/37cc0aa9e9d1bf7729a737a0d83f8b3f851c8eb137373d9f71eafb0a3405/pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d", size = 5304278, upload-time = "2026-03-02T09:05:21.464Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b4/a3/e705b0805212b663a4c27b861c8a603dba0f8b4bb281f96f8e746576a50d/pypdf-6.8.0.tar.gz", hash = "sha256:cb7eaeaa4133ce76f762184069a854e03f4d9a08568f0e0623f7ea810407833b", size = 5307831, upload-time = "2026-03-09T13:37:40.591Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/89/336673efd0a88956562658aba4f0bbef7cb92a6fbcbcaf94926dbc82b408/pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13", size = 331421, upload-time = "2026-03-02T09:05:19.722Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/ec/4ccf3bb86b1afe5d7176e1c8abcdbf22b53dd682ec2eda50e1caadcf6846/pypdf-6.8.0-py3-none-any.whl", hash = "sha256:2a025080a8dd73f48123c89c57174a5ff3806c71763ee4e49572dc90454943c7", size = 332177, upload-time = "2026-03-09T13:37:38.774Z" },
 ]

 [[package]]
Author	SHA1	Message	Date
openhands	05270dfe2a	Add GitHub resolver integration tests with mock server This adds integration tests for the GitHub resolver feature: - Mock GitHub Server (mocks/github-mock-server.ts): - Simulates GitHub REST API endpoints - Handles webhook signature verification - Records webhook events and outgoing responses - Provides test control endpoints for assertions - Webhook Payload Templates (mocks/github-webhook-payloads.ts): - Issue labeled events - Issue comment events - PR review comment events - Mock GitHub Client (mocks/mock-github-client.ts): - Client utilities for triggering webhooks - Helpers for waiting on resolver responses - GitHub Resolver Test Spec (tests/github-resolver.spec.ts): - Mock Server Mode: Tests full webhook flow with mock server - Live Environment Mode: Tests against staging/production - Error handling tests for invalid signatures and malformed data - Tests run against the existing authenticated session - Updated package.json with new scripts: - npm run test:github-resolver - npm run mock:github - Updated README with comprehensive documentation Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-12 14:46:06 +00:00
openhands	b9bd04e1cb	Add ESLint and Prettier lint checks for integration tests - Add .eslintrc with airbnb-base + TypeScript + Prettier config - Add .prettierrc.json matching frontend configuration - Add lint dependencies to package.json - Add typecheck, lint, and lint:fix scripts - Auto-format all TypeScript files with Prettier - Fix unused imports and parameters Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 21:31:18 +00:00
tofarr	afc499933b	Lint fix	2026-03-11 15:26:32 -06:00
tofarr	8f0e372133	Marked tests critical	2026-03-11 15:23:13 -06:00
tofarr	25540c6b4e	Fixed delete test	2026-03-11 15:19:52 -06:00
openhands	49627d44ca	Fix API key test: add wait after deletion and visibility check - Add 1 second wait after key deletion for page to settle - Add explicit visibility check before clicking Create API Key button Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 21:08:37 +00:00
tofarr	c18c9e2118	Moved API test to end	2026-03-11 15:04:13 -06:00
openhands	fb45bb2c92	Add API key creation and validation test Test flow: - Navigate to API Keys page via user menu - Verify Refresh API Key button is visible (indicates credits available) - Delete existing 'Integration Test Key' if present - Create new API key named 'Integration Test Key' - Capture the key from the modal - Test the key by calling GET /api/v1/sandboxes/search with X-Access-Token header - Verify response contains at least 1 sandbox Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 20:16:48 +00:00
tofarr	e65e9103d8	Better button detection	2026-03-11 13:57:35 -06:00
openhands	29f5bfdf0c	Fix Stripe checkout: wait for Pay button to be attached (not visible) Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 19:42:49 +00:00
openhands	8c90ddcea2	Fix Stripe checkout: wait for Pay button instead of networkidle The networkidle wait was timing out because the 'Pay with Link' feature loads slowly. Since we don't use that feature, we now wait for the Pay button to be visible instead, which indicates the form is ready to fill. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 19:39:04 +00:00
openhands	ba26ad5147	Add Stripe billing test for purchasing $10 credits Test flow: - Navigate to billing page via user menu - Capture initial balance - Enter $10 and click Add Credit - Fill Stripe checkout form (test card 5105105105105100) - Submit payment and return to billing page - Verify balance increased by exactly $10 Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 19:23:10 +00:00
openhands	33c6f63589	Update Tavily test to navigate to existing conversation Instead of starting a new conversation, the Tavily search test now navigates to the first running conversation from the recent conversations list. This is faster and tests the navigation flow. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 18:59:33 +00:00
Tim O'Farrell	63365e2793	Merge branch 'main' into add-playwright-smoke-tests	2026-03-11 12:56:00 -06:00
openhands	b45cf6d23f	Refactor smoke tests to use waitForMessageContaining - Remove TEST_PROMPT environment variable (use specific prompts per test) - Add waitForMessageContaining() method to ConversationPage that polls for a message containing expected text instead of just getting last message - Update 'start conversation' test to use 'Reverse the word hello' prompt and wait for 'olleh' in response - Update Tavily test to wait for 'Micheál Martin' in response - This fixes flaky tests where LLM outputs additional messages after the expected response Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 18:50:31 +00:00
mamoodi	4eb6e4da09	Release 1.5.0 (#13336 )	2026-03-11 14:50:13 -04:00
Tim O'Farrell	80dc8b0fbf	Merge branch 'main' into add-playwright-smoke-tests	2026-03-11 12:28:23 -06:00
openhands	ab3b5dc947	Add Tavily search test for Irish Prime Minister Add test that: - Starts a new conversation - Prompts agent to use Tavily search to find the PM of Ireland - Verifies response contains 'Micheál Martin' - Uses 180s timeout to allow for search operation Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 18:03:47 +00:00
openhands	43bc9a8a1a	Replace error banner test with navigate to conversation test - Remove 'should not display error banner on successful interaction' test - Add 'should be able to navigate to a running conversation' test that: - Goes to home page - Clicks first conversation in recent conversations list - Waits for conversation status to show 'Waiting for task' - Add clickFirstConversation() method to HomePage Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 17:37:39 +00:00
openhands	7902806a92	Add verification that coin flip response contains heads or tails Verify the agent's response to 'Flip a coin!' contains either 'heads' or 'tails' (case insensitive) to ensure the agent actually processed the request correctly. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 17:18:25 +00:00
dependabot[bot]	7e66304746	chore(deps): bump pypdf from 6.7.5 to 6.8.0 (#13348 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 12:09:09 -05:00
openhands	610856cdd4	Merge conversation start and prompt tests into single test Combined 'should be able to start a conversation and interact with agent' and 'should be able to send a prompt and receive response without errors' into one test since sending a prompt depends on having started a conversation. This avoids duplicate setup (navigating to home, starting conversation, waiting for ready) and tests the complete user flow in one go. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 15:09:17 +00:00
Graham Neubig	a8b12e8eb8	Remove Common Room sync scripts (#13347 ) Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 10:48:37 -04:00
openhands	500005ec4d	Fix openUserMenu to wait for async state before interacting The account settings menu is conditionally rendered based on async state (config loaded, user authenticated, providers loaded). The previous implementation would fail because: 1. The menu DOM element doesn't exist until async conditions are met 2. Even if avatar is visible, the menu might not be rendered yet Changes: - Wait for user avatar to be visible first - Wait for menu to be 'attached' to DOM (ensures async state loaded) - Hover over user-actions container (parent with group class) to trigger the CSS group-hover visibility - Then verify menu is visible Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 13:11:16 +00:00
tofarr	ac0674287c	Enable user menu by click rather than hover	2026-03-11 06:50:56 -06:00
openhands	df6c5f4de3	Fix TypeScript errors in smoke tests - Add default value for buttonId parameter in startNewConversation() - Fix TEST_REPO_URL undefined handling with proper conditional checks - Restore openUserMenu to use hover instead of click Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 12:46:28 +00:00
openhands	6f1df78b78	Update waitForConversationReady to check for 'Waiting for task' text - Change default timeout from 90s to 30s - Replace input enabled check with text-based search for 'Waiting for task' - Using text search since data-testid is not yet deployed to staging Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 12:44:49 +00:00
tofarr	f8b456962c	WIP	2026-03-11 06:39:37 -06:00
Xingyao Wang	53bb82fe2e	fix: use project_dir consistently for workspace.working_dir, setup.sh, and git hooks (#13329 ) Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 15:26:34 +08:00
openhands	213fc9d752	Add agent status wait for 'Waiting for task' in smoke tests - Add data-testid='agent-status-text' to agent-status.tsx span element - Add wait for agent status to show 'Waiting for task' before proceeding in smoke.spec.ts conversation test Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-11 02:02:09 +00:00
Tim O'Farrell	8b78397ec4	Merge branch 'main' into add-playwright-smoke-tests	2026-03-10 19:53:54 -06:00
tofarr	adc6ec77ae	Revert back to click rather than hover	2026-03-10 19:53:10 -06:00
tofarr	c1cd21e94d	Merge branch 'add-playwright-smoke-tests' of https://github.com/OpenHands/OpenHands into add-playwright-smoke-tests	2026-03-10 19:51:41 -06:00
tofarr	36bf86db36	General updates - starting a conversation	2026-03-10 19:51:03 -06:00
openhands	72ab6aed88	Fix user menu to use hover instead of click in Playwright tests The user settings menu appears on hover in non-mobile mode due to CSS group-hover classes, not on click. Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-10 19:42:56 +00:00
Tim O'Farrell	db40eb1e94	Using the web_url where it is configured rather than the request.url (#13319 ) Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-10 13:11:33 -06:00
openhands	7426b913a1	Handle Keycloak session redirect and TOS acceptance in Playwright tests - Update authenticateWithGitHub to detect redirects to home page or /accept-tos when user is already logged in to Keycloak - Add handleTOSAcceptance function to check TOS checkbox and submit - Add post-authentication TOS handling for users completing GitHub OAuth Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-10 18:36:10 +00:00
Hiep Le	debbaae385	fix(backend): inherit organization llm settings for new members (#13330 )	2026-03-11 01:28:46 +07:00
Juan Michelini	5e5950b091	Add Gemini-3.1-Pro-Preview model support to frontend (#13253 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Ray Myers <ray.myers@gmail.com>	2026-03-10 16:18:13 +00:00
John-Mason P. Shackelford	c7ff560465	Fix getGitPath to handle nested GitLab group paths (#13006 ) Co-authored-by: openhands <openhands@all-hands.dev>	2026-03-10 11:12:08 -05:00
Joe Laverty	3432bbbb88	fix: Remove N+1 request from Bitbucket Data Center integration (#13281 )	2026-03-10 11:08:30 -05:00
Hiep Le	fc24be2627	fix(frontend): preserve login_method param to enable session re-authentication (#13310 )	2026-03-10 22:52:40 +07:00
Hiep Le	bc72b38d6e	fix(backend): propagate LLM settings to all org members when admin saves settings (#13326 )	2026-03-10 22:52:01 +07:00
Dream	145f1266e6	feat(frontend): create a separate UI tab for monitoring tasks (#13065 ) Co-authored-by: hieptl <hieptl.developer@gmail.com>	2026-03-10 22:31:38 +07:00
tofarr	4b32786588	Updated test	2026-03-10 09:26:12 -06:00
tofarr	c4929925fb	Added prompt	2026-03-09 20:50:22 -06:00
openhands	158799b45e	fix: make storageState conditional on auth file existence The Playwright config was failing with ENOENT when auth.json didn't exist because storageState was unconditionally set to the auth file path. Now the config checks if fixtures/auth.json exists before setting storageState, allowing the setup project to run first and create the auth file.	2026-03-10 02:49:04 +00:00
Tim O'Farrell	4f0f0d0b98	Merge branch 'main' into add-playwright-smoke-tests	2026-03-09 17:44:20 -06:00
openhands	07c9c3016c	chore: Disable automatic workflow triggers, manual only for now Keep only workflow_dispatch trigger active. Automatic triggers (push, PR, schedule) and dependent jobs are commented out and can be re-enabled later. Co-authored-by: openhands <openhands@all-hands.dev>	2026-02-10 09:50:17 +00:00
openhands	0f912e09cc	feat: Add Playwright-based smoke tests for integration testing Add a comprehensive integration test framework using Playwright to enable automated smoke testing across different environments (staging, production, feature branches). Features: - Multi-environment support with configurable BASE_URL - GitHub OAuth and Keycloak authentication handling - Page Object Model architecture for maintainable tests - Authentication state persistence for faster test runs - GitHub Actions workflow for CI/CD integration Tests cover: - User authentication flow - Home screen accessibility - Repository selection - Conversation creation and agent interaction - Error-free prompt/response cycle - Health checks (SSL, console errors) Co-authored-by: openhands <openhands@all-hands.dev>	2026-02-10 09:47:52 +00:00