docs: fix CLI mode doc when running in dev model

Fix for issues where callbacks are not batched (#10235 )
Add BatchedWebHookFileStore for batching webhook updates (#10119 )
2026-04-29 03:00:45 -04:00 · 2025-08-11 17:55:21 -04:00 · 2025-08-11 15:44:48 -06:00 · 2025-08-11 12:51:08 -06:00 · 2025-08-11 12:17:18 -06:00 · 2025-08-11 18:11:08 +00:00
121 changed files with 2512 additions and 5621 deletions
@@ -2,7 +2,7 @@
 # See https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners

 # Frontend code owners
-/frontend/ @rbren @amanape
+/frontend/ @amanape
 /openhands-ui/ @amanape

 # Evaluation code owners
@@ -1,33 +1,53 @@
 #!/bin/bash

+set -euxo pipefail
+
 # This script updates the PR description with commands to run the PR locally
 # It adds both Docker and uvx commands

 # Get the branch name for the PR
-BRANCH_NAME=$(gh pr view $PR_NUMBER --json headRefName --jq .headRefName)
+BRANCH_NAME=$(gh pr view "$PR_NUMBER" --json headRefName --jq .headRefName)

 # Define the Docker command
 DOCKER_RUN_COMMAND="docker run -it --rm \
  -p 3000:3000 \
  -v /var/run/docker.sock:/var/run/docker.sock \
  --add-host host.docker.internal:host-gateway \
-  -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:$SHORT_SHA-nikolaik \
-  --name openhands-app-$SHORT_SHA \
-  docker.all-hands.dev/all-hands-ai/openhands:$SHORT_SHA"
+  -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${SHORT_SHA}-nikolaik \
+  --name openhands-app-${SHORT_SHA} \
+  docker.all-hands.dev/all-hands-ai/openhands:${SHORT_SHA}"

 # Define the uvx command
-UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@$BRANCH_NAME openhands"
+UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@${BRANCH_NAME} openhands"

 # Get the current PR body
-PR_BODY=$(gh pr view $PR_NUMBER --json body --jq .body)
+PR_BODY=$(gh pr view "$PR_NUMBER" --json body --jq .body)

 # Prepare the new PR body with both commands
 if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
-  # For existing PR descriptions, replace the command section
-  NEW_PR_BODY=$(echo "$PR_BODY" | sed "s|To run this PR locally, use the following command:.*\`\`\`|To run this PR locally, use the following command:\n\nGUI with Docker:\n\`\`\`\n$DOCKER_RUN_COMMAND\n\`\`\`\n\nCLI with uvx:\n\`\`\`\n$UVX_RUN_COMMAND\n\`\`\`|s")
+  # For existing PR descriptions, use a more robust approach
+  # Split the PR body at the "To run this PR locally" section and replace everything after it
+  BEFORE_SECTION=$(echo "$PR_BODY" | sed '/To run this PR locally, use the following command:/,$d')
+  NEW_PR_BODY=$(cat <<EOF
+${BEFORE_SECTION}
+
+To run this PR locally, use the following command:
+
+GUI with Docker:
+\`\`\`
+${DOCKER_RUN_COMMAND}
+\`\`\`
+
+CLI with uvx:
+\`\`\`
+${UVX_RUN_COMMAND}
+\`\`\`
+EOF
+)
 else
-  # For new PR descriptions
-  NEW_PR_BODY="${PR_BODY}
+  # For new PR descriptions: use heredoc safely without indentation
+  NEW_PR_BODY=$(cat <<EOF
+$PR_BODY

 ---

@@ -35,15 +55,17 @@ To run this PR locally, use the following command:

 GUI with Docker:
 \`\`\`
-$DOCKER_RUN_COMMAND
+${DOCKER_RUN_COMMAND}
 \`\`\`

 CLI with uvx:
 \`\`\`
-$UVX_RUN_COMMAND
-\`\`\`"
+${UVX_RUN_COMMAND}
+\`\`\`
+EOF
+)
 fi

 # Update the PR description
 echo "Updating PR description with Docker and uvx commands"
-gh pr edit $PR_NUMBER --body "$NEW_PR_BODY"
+gh pr edit "$PR_NUMBER" --body "$NEW_PR_BODY"
@@ -48,11 +48,11 @@ jobs:
      - name: Build Environment
        run: make build
      - name: Run Unit Tests
-        run: poetry run pytest --forked -n auto -svv ./tests/unit
+        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
      - name: Run Runtime Tests with CLIRuntime
-        run: TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
+        run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
      - name: Run E2E Tests
-        run: poetry run pytest -svv tests/e2e
+        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e

  # Run specific Windows python tests
  test-on-windows:
@@ -77,9 +77,11 @@ jobs:
      - name: Run Windows unit tests
        run: poetry run pytest -svv tests/unit/test_windows_bash.py
        env:
+          PYTHONPATH: ".;$env:PYTHONPATH"
          DEBUG: "1"
      - name: Run Windows runtime tests with LocalRuntime
        run: $env:TEST_RUNTIME="local"; poetry run pytest -svv tests/runtime/test_bash.py
        env:
+          PYTHONPATH: ".;$env:PYTHONPATH"
          TEST_RUNTIME: local
          DEBUG: "1"
@@ -12,11 +12,11 @@ jobs:
    steps:
      - uses: actions/stale@v9
        with:
-          stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
-          stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
-          days-before-stale: 30
+          stale-issue-message: 'This issue is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
+          stale-pr-message: 'This PR is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
+          days-before-stale: 40
          exempt-issue-labels: 'roadmap'
-          close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
-          close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
-          days-before-close: 7
+          close-issue-message: 'This issue was automatically closed due to 50 days of inactivity. We do this to help keep the issues somewhat manageable and focus on active issues.'
+          close-pr-message: 'This PR was closed because it had no activity for 50 days. If you feel this was closed in error, and you would like to continue the PR, please resubmit or let us know.'
+          days-before-close: 10
          operations-per-run: 150
@@ -1,126 +0,0 @@
-# Consolidated Gemini Performance Test Suite
-
-This document describes the consolidated and deduplicated test suite for investigating Gemini 2.5 Pro performance issues in OpenHands.
-
-## 📁 Test Files Overview
-
-### 1. `test_thinking_budget.py` - **PRIMARY THINKING/REASONING TEST**
-**Purpose**: Primary test for thinking budget and reasoning effort configurations
-**Features**:
- Tests old vs new Google Generative AI APIs
- Compares thinking budget configurations (128, 1024, 2048, 4096 tokens)
- Tests reasoning_effort parameters via LiteLLM
- Includes direct REST API calls for comparison
- **User Preference**: This is the main file for thinking/reasoning tests
-
-### 2. `test_litellm_comprehensive.py` - **COMPREHENSIVE LITELLM TEST**
-**Purpose**: Consolidated LiteLLM performance testing (replaces test_litellm_performance.py + test_openhands_litellm.py)
-**Features**:
- Basic LiteLLM configurations (streaming, temperature, etc.)
- OpenHands-style configuration and calls
- Reasoning effort and thinking budget parameters
- Comprehensive performance analysis and comparison
- **Consolidation**: Combines functionality from 2 previous files
-
-### 3. `test_native_gemini.py` - **NATIVE GOOGLE API TEST**
-**Purpose**: Tests native Google Generative AI library (like RooCode uses)
-**Features**:
- Direct Google API calls without LiteLLM abstraction
- Streaming and non-streaming tests
- Performance comparison baseline
- **Baseline**: Shows optimal performance without middleware
-
-### 4. `test_openhands_gemini_fix.py` - **OPENHANDS FIX VERIFICATION**
-**Purpose**: Tests the actual OpenHands Gemini performance fix implementation
-**Features**:
- Tests OpenHands with optimized thinking budget configuration
- Verifies 2.5x speedup (from ~25s to ~10s)
- Configuration inspection and validation
- **Implementation**: Tests the actual fix we deployed
-
-### 5. `run_performance_tests.py` - **TEST ORCHESTRATOR**
-**Purpose**: Runs all tests in sequence and provides comprehensive analysis
-**Features**:
- Dependency checking
- Sequential test execution
- Performance metrics extraction
- Comparative analysis across all test types
- **Orchestrator**: Runs all tests and provides summary
-
-## 🗑️ Removed Files (Redundant)
-
-### Removed: `quick_test.py`
- **Reason**: Very basic test, functionality covered by `test_native_gemini.py`
- **Redundancy**: Simple native API test already in comprehensive native test
-
-### Removed: `test_litellm_performance.py`
- **Reason**: Merged into `test_litellm_comprehensive.py`
- **Redundancy**: Basic LiteLLM configurations now in comprehensive test
-
-### Removed: `test_openhands_litellm.py`
- **Reason**: Merged into `test_litellm_comprehensive.py`
- **Redundancy**: OpenHands-style calls now in comprehensive test
-
-## 🎯 Test Suite Organization
-
-```
-Performance Testing Hierarchy:
-├── run_performance_tests.py (Orchestrator)
-├── test_thinking_budget.py (Primary thinking/reasoning)
-├── test_litellm_comprehensive.py (All LiteLLM scenarios)
-├── test_native_gemini.py (Baseline performance)
-└── test_openhands_gemini_fix.py (Fix verification)
-```
-
-## 🚀 Usage
-
-### Run Individual Tests:
-```bash
-# Primary thinking/reasoning test
-python test_thinking_budget.py
-
-# Comprehensive LiteLLM test
-python test_litellm_comprehensive.py
-
-# Native API baseline
-python test_native_gemini.py
-
-# OpenHands fix verification
-python test_openhands_gemini_fix.py
-```
-
-### Run Complete Suite:
-```bash
-# Run all tests with analysis
-python run_performance_tests.py
-```
-
-## 📊 Test Coverage
-
-| Test Aspect | Primary Test File | Coverage |
-|-------------|------------------|----------|
-| **Thinking Budget** | `test_thinking_budget.py` | ✅ Complete |
-| **Reasoning Effort** | `test_thinking_budget.py` | ✅ Complete |
-| **LiteLLM Performance** | `test_litellm_comprehensive.py` | ✅ Complete |
-| **OpenHands Style** | `test_litellm_comprehensive.py` | ✅ Complete |
-| **Native API Baseline** | `test_native_gemini.py` | ✅ Complete |
-| **Fix Verification** | `test_openhands_gemini_fix.py` | ✅ Complete |
-| **Streaming vs Non-streaming** | All files | ✅ Complete |
-| **Parameter Variations** | All files | ✅ Complete |
-
-## 🎉 Benefits of Consolidation
-
-1. **Reduced Redundancy**: Eliminated duplicate test logic across 3 files
-2. **Better Organization**: Clear separation of concerns by test purpose
-3. **Easier Maintenance**: Single comprehensive test instead of multiple overlapping ones
-4. **User Preference**: `test_thinking_budget.py` as primary thinking/reasoning test
-5. **Complete Coverage**: All original functionality preserved and enhanced
-
-## 🔧 Dependencies
-
- `litellm` - For LiteLLM testing
- `google-generativeai` - For old Google API
- `google-genai` - For new Google API with thinking budget
- `openhands` - For OpenHands fix testing
-
-All dependencies are checked by `run_performance_tests.py` before execution.
@@ -1,752 +0,0 @@
-{
-  "test_suite": "comprehensive_performance_analysis",
-  "timestamp": 1753576041.7115579,
-  "total_tests": 16,
-  "successful_tests": 16,
-  "thinking_budget_tests": {
-    "test_type": "thinking_budget",
-    "timestamp": 1753575753.837211,
-    "total_configs": 7,
-    "successful_configs": 7,
-    "results": [
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.797980308532715,
-        "step2_duration": 1.8835067749023438e-05,
-        "step3_duration": 2.499279260635376,
-        "total_duration": 5.2979230880737305,
-        "tool_call_success": true,
-        "tool_call_result": "5670.0",
-        "result_correct": false,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 86,
-        "message_count": 6,
-        "config_name": "Old API (No Thinking)",
-        "timestamp": 1753575680.1571221
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 1.8824458122253418,
-        "step2_duration": 1.5384819507598877,
-        "step3_duration": 2.318272113800049,
-        "total_duration": 5.739390850067139,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 5,
-        "step3_response_length": 160,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 128",
-        "timestamp": 1753575685.896559
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.7450361251831055,
-        "step2_duration": 1.0403151512145996,
-        "step3_duration": 5.529464960098267,
-        "total_duration": 9.314986944198608,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 36,
-        "step3_response_length": 153,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 1024",
-        "timestamp": 1753575695.211576
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.2801640033721924,
-        "step2_duration": 1.226274013519287,
-        "step3_duration": 5.528562068939209,
-        "total_duration": 10.035185813903809,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 7,
-        "step3_response_length": 131,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 4096",
-        "timestamp": 1753575705.246801
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.210190773010254,
-        "step2_duration": 7.360184669494629,
-        "step3_duration": 9.522583961486816,
-        "total_duration": 21.093040704727173,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 283,
-        "message_count": 6,
-        "config_name": "LiteLLM - Reasoning Effort: Low",
-        "timestamp": 1753575726.339884
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.9966609477996826,
-        "step2_duration": 1.2283189296722412,
-        "step3_duration": 15.889936923980713,
-        "total_duration": 21.115014791488647,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "LiteLLM - Reasoning Effort: High",
-        "timestamp": 1753575747.454922
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.030133008956909,
-        "step2_duration": 1.9902338981628418,
-        "step3_duration": 2.3604180812835693,
-        "total_duration": 6.380887031555176,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 277,
-        "message_count": 6,
-        "config_name": "LiteLLM - Thinking Budget: 128",
-        "timestamp": 1753575753.83583
-      }
-    ]
-  },
-  "litellm_comprehensive_tests": {
-    "test_type": "litellm_comprehensive",
-    "timestamp": 1753575966.9497,
-    "total_configs": 9,
-    "successful_configs": 9,
-    "results": [
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.1620140075683594,
-        "step2_duration": 6.163906097412109,
-        "step3_duration": 8.57595705986023,
-        "total_duration": 17.901986122131348,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 290,
-        "message_count": 6,
-        "config_name": "Basic LiteLLM",
-        "timestamp": 1753575823.836127
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.643059253692627,
-        "step2_duration": 4.244822978973389,
-        "step3_duration": 8.579889059066772,
-        "total_duration": 15.474514722824097,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "LiteLLM with Streaming",
-        "timestamp": 1753575839.3106902
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.299806833267212,
-        "step2_duration": 4.562235116958618,
-        "step3_duration": 9.42275094985962,
-        "total_duration": 17.284837007522583,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 288,
-        "message_count": 6,
-        "config_name": "OpenHands Style (No Stream)",
-        "timestamp": 1753575856.595548
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.8680617809295654,
-        "step2_duration": 4.986494064331055,
-        "step3_duration": 11.908216714859009,
-        "total_duration": 19.762842893600464,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 303,
-        "message_count": 6,
-        "config_name": "OpenHands Style (Streaming)",
-        "timestamp": 1753575876.358408
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.153742074966431,
-        "step2_duration": 1.2760770320892334,
-        "step3_duration": 10.748784065246582,
-        "total_duration": 16.178749799728394,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: Low",
-        "timestamp": 1753575892.5371861
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.199495792388916,
-        "step2_duration": 11.224999904632568,
-        "step3_duration": 6.673478841781616,
-        "total_duration": 22.098058938980103,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 280,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: Medium",
-        "timestamp": 1753575914.6352708
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.7451419830322266,
-        "step2_duration": 1.131227970123291,
-        "step3_duration": 12.550342082977295,
-        "total_duration": 17.426751136779785,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 306,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: High",
-        "timestamp": 1753575932.0620391
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.0755691528320312,
-        "step2_duration": 3.7900118827819824,
-        "step3_duration": 8.599286079406738,
-        "total_duration": 15.464945077896118,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 306,
-        "message_count": 6,
-        "config_name": "Thinking Budget: 128",
-        "timestamp": 1753575947.527002
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.970345973968506,
-        "step2_duration": 4.713220119476318,
-        "step3_duration": 11.738292932510376,
-        "total_duration": 19.421957969665527,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 310,
-        "message_count": 6,
-        "config_name": "Thinking Budget: 1024",
-        "timestamp": 1753575966.948982
-      }
-    ]
-  },
-  "summary": {
-    "all_results": [
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.797980308532715,
-        "step2_duration": 1.8835067749023438e-05,
-        "step3_duration": 2.499279260635376,
-        "total_duration": 5.2979230880737305,
-        "tool_call_success": true,
-        "tool_call_result": "5670.0",
-        "result_correct": false,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 86,
-        "message_count": 6,
-        "config_name": "Old API (No Thinking)",
-        "timestamp": 1753575680.1571221
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 1.8824458122253418,
-        "step2_duration": 1.5384819507598877,
-        "step3_duration": 2.318272113800049,
-        "total_duration": 5.739390850067139,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 5,
-        "step3_response_length": 160,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 128",
-        "timestamp": 1753575685.896559
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.7450361251831055,
-        "step2_duration": 1.0403151512145996,
-        "step3_duration": 5.529464960098267,
-        "total_duration": 9.314986944198608,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 36,
-        "step3_response_length": 153,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 1024",
-        "timestamp": 1753575695.211576
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.2801640033721924,
-        "step2_duration": 1.226274013519287,
-        "step3_duration": 5.528562068939209,
-        "total_duration": 10.035185813903809,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 7,
-        "step3_response_length": 131,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 4096",
-        "timestamp": 1753575705.246801
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.210190773010254,
-        "step2_duration": 7.360184669494629,
-        "step3_duration": 9.522583961486816,
-        "total_duration": 21.093040704727173,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 283,
-        "message_count": 6,
-        "config_name": "LiteLLM - Reasoning Effort: Low",
-        "timestamp": 1753575726.339884
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.9966609477996826,
-        "step2_duration": 1.2283189296722412,
-        "step3_duration": 15.889936923980713,
-        "total_duration": 21.115014791488647,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "LiteLLM - Reasoning Effort: High",
-        "timestamp": 1753575747.454922
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.030133008956909,
-        "step2_duration": 1.9902338981628418,
-        "step3_duration": 2.3604180812835693,
-        "total_duration": 6.380887031555176,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 277,
-        "message_count": 6,
-        "config_name": "LiteLLM - Thinking Budget: 128",
-        "timestamp": 1753575753.83583
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.1620140075683594,
-        "step2_duration": 6.163906097412109,
-        "step3_duration": 8.57595705986023,
-        "total_duration": 17.901986122131348,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 290,
-        "message_count": 6,
-        "config_name": "Basic LiteLLM",
-        "timestamp": 1753575823.836127
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.643059253692627,
-        "step2_duration": 4.244822978973389,
-        "step3_duration": 8.579889059066772,
-        "total_duration": 15.474514722824097,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "LiteLLM with Streaming",
-        "timestamp": 1753575839.3106902
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.299806833267212,
-        "step2_duration": 4.562235116958618,
-        "step3_duration": 9.42275094985962,
-        "total_duration": 17.284837007522583,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 288,
-        "message_count": 6,
-        "config_name": "OpenHands Style (No Stream)",
-        "timestamp": 1753575856.595548
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.8680617809295654,
-        "step2_duration": 4.986494064331055,
-        "step3_duration": 11.908216714859009,
-        "total_duration": 19.762842893600464,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 303,
-        "message_count": 6,
-        "config_name": "OpenHands Style (Streaming)",
-        "timestamp": 1753575876.358408
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.153742074966431,
-        "step2_duration": 1.2760770320892334,
-        "step3_duration": 10.748784065246582,
-        "total_duration": 16.178749799728394,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: Low",
-        "timestamp": 1753575892.5371861
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.199495792388916,
-        "step2_duration": 11.224999904632568,
-        "step3_duration": 6.673478841781616,
-        "total_duration": 22.098058938980103,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 280,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: Medium",
-        "timestamp": 1753575914.6352708
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.7451419830322266,
-        "step2_duration": 1.131227970123291,
-        "step3_duration": 12.550342082977295,
-        "total_duration": 17.426751136779785,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 306,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: High",
-        "timestamp": 1753575932.0620391
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.0755691528320312,
-        "step2_duration": 3.7900118827819824,
-        "step3_duration": 8.599286079406738,
-        "total_duration": 15.464945077896118,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 306,
-        "message_count": 6,
-        "config_name": "Thinking Budget: 128",
-        "timestamp": 1753575947.527002
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.970345973968506,
-        "step2_duration": 4.713220119476318,
-        "step3_duration": 11.738292932510376,
-        "total_duration": 19.421957969665527,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 310,
-        "message_count": 6,
-        "config_name": "Thinking Budget: 1024",
-        "timestamp": 1753575966.948982
-      }
-    ],
-    "fastest_configs": [
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.797980308532715,
-        "step2_duration": 1.8835067749023438e-05,
-        "step3_duration": 2.499279260635376,
-        "total_duration": 5.2979230880737305,
-        "tool_call_success": true,
-        "tool_call_result": "5670.0",
-        "result_correct": false,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 86,
-        "message_count": 6,
-        "config_name": "Old API (No Thinking)",
-        "timestamp": 1753575680.1571221
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 1.8824458122253418,
-        "step2_duration": 1.5384819507598877,
-        "step3_duration": 2.318272113800049,
-        "total_duration": 5.739390850067139,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 5,
-        "step3_response_length": 160,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 128",
-        "timestamp": 1753575685.896559
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.030133008956909,
-        "step2_duration": 1.9902338981628418,
-        "step3_duration": 2.3604180812835693,
-        "total_duration": 6.380887031555176,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 277,
-        "message_count": 6,
-        "config_name": "LiteLLM - Thinking Budget: 128",
-        "timestamp": 1753575753.83583
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.7450361251831055,
-        "step2_duration": 1.0403151512145996,
-        "step3_duration": 5.529464960098267,
-        "total_duration": 9.314986944198608,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 36,
-        "step3_response_length": 153,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 1024",
-        "timestamp": 1753575695.211576
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.2801640033721924,
-        "step2_duration": 1.226274013519287,
-        "step3_duration": 5.528562068939209,
-        "total_duration": 10.035185813903809,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 7,
-        "step3_response_length": 131,
-        "message_count": 6,
-        "config_name": "New API - Thinking Budget: 4096",
-        "timestamp": 1753575705.246801
-      }
-    ],
-    "slowest_configs": [
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.970345973968506,
-        "step2_duration": 4.713220119476318,
-        "step3_duration": 11.738292932510376,
-        "total_duration": 19.421957969665527,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 310,
-        "message_count": 6,
-        "config_name": "Thinking Budget: 1024",
-        "timestamp": 1753575966.948982
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 2.8680617809295654,
-        "step2_duration": 4.986494064331055,
-        "step3_duration": 11.908216714859009,
-        "total_duration": 19.762842893600464,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 34,
-        "step3_response_length": 303,
-        "message_count": 6,
-        "config_name": "OpenHands Style (Streaming)",
-        "timestamp": 1753575876.358408
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.210190773010254,
-        "step2_duration": 7.360184669494629,
-        "step3_duration": 9.522583961486816,
-        "total_duration": 21.093040704727173,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 283,
-        "message_count": 6,
-        "config_name": "LiteLLM - Reasoning Effort: Low",
-        "timestamp": 1753575726.339884
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 3.9966609477996826,
-        "step2_duration": 1.2283189296722412,
-        "step3_duration": 15.889936923980713,
-        "total_duration": 21.115014791488647,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 35,
-        "step3_response_length": 0,
-        "message_count": 6,
-        "config_name": "LiteLLM - Reasoning Effort: High",
-        "timestamp": 1753575747.454922
-      },
-      {
-        "success": true,
-        "error": null,
-        "step1_duration": 4.199495792388916,
-        "step2_duration": 11.224999904632568,
-        "step3_duration": 6.673478841781616,
-        "total_duration": 22.098058938980103,
-        "tool_call_success": true,
-        "tool_call_result": "5670",
-        "result_correct": true,
-        "step1_response_length": 0,
-        "step2_response_length": 0,
-        "step3_response_length": 280,
-        "message_count": 6,
-        "config_name": "Reasoning Effort: Medium",
-        "timestamp": 1753575914.6352708
-      }
-    ],
-    "performance_analysis": {
-      "fastest_time": 5.2979230880737305,
-      "slowest_time": 22.098058938980103,
-      "average_time": 14.999442055821419,
-      "median_time": 17.284837007522583,
-      "total_successful_tests": 16,
-      "success_rate": 100.0
-    }
-  }
-}
@@ -40,7 +40,7 @@ repos:
    hooks:
      - id: mypy
        additional_dependencies:
-          [types-requests, types-setuptools, types-pyyaml, types-toml, types-docker, pydantic, lxml]
+          [types-requests, types-setuptools, types-pyyaml, types-toml, types-docker, types-Markdown, pydantic, lxml]
        # To see gaps add `--html-report mypy-report/`
        entry: mypy --config-file dev_config/python/mypy.ini openhands/
        always_run: true
@@ -80,7 +80,7 @@ openhands
 <Note>
  If you have cloned the repository, you can also run the CLI directly using Poetry:

-  poetry run python -m openhands.cli.main
+  poetry run openhands
 </Note>

 3. Set your model, API key, and other preferences using the UI (or alternatively environment variables, below).
@@ -7,6 +7,67 @@ description: High level overview of the Graphical User Interface (GUI) in OpenHa

 - [OpenHands is running](/usage/local-setup)

+## Launching the GUI Server
+
+### Using the CLI Command
+
+You can launch the OpenHands GUI server directly from the command line using the `serve` command:
+
+<Callout type="info">
+**Prerequisites**: You need to have the [OpenHands CLI installed](/usage/how-to/cli-mode) first, OR have `uv` installed and run `uvx --python 3.12 --from openhands-ai openhands serve`. Otherwise, you'll need to use Docker directly (see the [Docker section](#using-docker-directly) below).
+</Callout>
+
+```bash
+openhands serve
+```
+
+This command will:
+- Check that Docker is installed and running
+- Pull the required Docker images
+- Launch the OpenHands GUI server at http://localhost:3000
+- Use the same configuration directory (`~/.openhands`) as the CLI mode
+
+#### Mounting Your Current Directory
+
+To mount your current working directory into the GUI server container, use the `--mount-cwd` flag:
+
+```bash
+openhands serve --mount-cwd
+```
+
+This is useful when you want to work on files in your current directory through the GUI. The directory will be mounted at `/workspace` inside the container.
+
+#### Using GPU Support
+
+If you have NVIDIA GPUs and want to make them available to the OpenHands container, use the `--gpu` flag:
+
+```bash
+openhands serve --gpu
+```
+
+This will enable GPU support via nvidia-docker, mounting all available GPUs into the container. You can combine this with other flags:
+
+```bash
+openhands serve --gpu --mount-cwd
+```
+
+**Prerequisites for GPU support:**
+- NVIDIA GPU drivers must be installed on your host system
+- [NVIDIA Container Toolkit (nvidia-docker2)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) must be installed and configured
+
+#### Requirements
+
+Before using the `openhands serve` command, ensure that:
+- Docker is installed and running on your system
+- You have internet access to pull the required Docker images
+- Port 3000 is available on your system
+
+The CLI will automatically check these requirements and provide helpful error messages if anything is missing.
+
+### Using Docker Directly
+
+Alternatively, you can run the GUI server using Docker directly. See the [local setup guide](/usage/local-setup) for detailed Docker instructions.
+
 ## Overview

 ### Initial Setup
@@ -18,7 +18,7 @@ Based on these findings and community feedback, these are the latest models that
 ### Cloud / API-Based Models

 - [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommended)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
+- [openai/gpt-5-2025-08-07](https://openai.com/api/) (recommended)
 - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
 - [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
 - [moonshot/kimi-k2-0711-preview](https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2)
@@ -32,4 +32,4 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr

 Pricing follows official API provider rates. [You can view model prices here.](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)

-For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: $0.4 per million input tokens and $1.6 per million output tokens.
+For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: \$0.4 per million input tokens and \$1.6 per million output tokens.
@@ -66,6 +66,30 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to

 ### Start the App

+#### Option 1: Using the CLI Launcher (Recommended)
+
+If you have Python 3.12+ installed, you can use the CLI launcher for a simpler experience:
+
+```bash
+# Install OpenHands
+pip install openhands-ai
+
+# Launch the GUI server
+openhands serve
+
+# Or with GPU support (requires nvidia-docker)
+openhands serve --gpu
+
+# Or with current directory mounted
+openhands serve --mount-cwd
+```
+
+Or using `uvx --python 3.12 --from openhands-ai openhands serve` if you have [uv](https://docs.astral.sh/uv/) installed.
+
+This will automatically handle Docker requirements checking, image pulling, and launching the GUI server. The `--gpu` flag enables GPU support via nvidia-docker, and `--mount-cwd` mounts your current directory into the container.
+
+#### Option 2: Using Docker Directly
+
 ```bash
 docker pull docker.all-hands.dev/all-hands-ai/runtime:0.51-nikolaik

@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -172,7 +172,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--answerer_model', '-a', default='gpt-3.5-turbo', help='answerer model'
    )
@@ -26,8 +26,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -525,7 +525,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -10,7 +10,6 @@ import huggingface_hub
 import pandas as pd
 from datasets import load_dataset
 from PIL import Image
-from pydantic import SecretStr

 from evaluation.benchmarks.gaia.scorer import question_scorer
 from evaluation.benchmarks.gaia.utils import (
@@ -31,8 +30,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
    load_from_toml,
 )
 from openhands.core.config.utils import get_agent_config_arg
@@ -80,8 +79,7 @@ def get_config(

    config_copy = copy.deepcopy(config)
    load_from_toml(config_copy)
-    if config_copy.search_api_key:
-        config.search_api_key = SecretStr(config_copy.search_api_key)
+    config.search_api_key = config_copy.search_api_key
    return config


@@ -294,7 +292,7 @@ Here is the task:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--level',
        type=str,
@@ -20,8 +20,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -134,7 +134,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--hubs',
        type=str,
@@ -38,8 +38,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -312,7 +312,7 @@ Ok now its time to start solving the question. Good luck!


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    # data split must be one of 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended'
    parser.add_argument(
        '--data-split',
@@ -21,7 +21,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
    load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -167,7 +167,7 @@ def process_predictions(predictions_path: str):


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '-s',
        '--eval-split',
@@ -30,8 +30,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
    load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -358,7 +358,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '-s',
        '--eval-split',
@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -267,7 +267,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -23,8 +23,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -229,7 +229,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()

    SUBSETS = [
        # Eurus subset: https://arxiv.org/abs/2404.02078
@@ -4,7 +4,11 @@ import pprint

 import tqdm

-from openhands.core.config import get_llm_config_arg, get_parser, load_openhands_config
+from openhands.core.config import (
+    get_evaluation_parser,
+    get_llm_config_arg,
+    load_openhands_config,
+)
 from openhands.core.logger import openhands_logger as logger
 from openhands.llm.llm import LLM

@@ -111,7 +115,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--json_file_path',
        type=str,
@@ -34,8 +34,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
    load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -273,7 +273,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '-s',
        '--eval-split',
@@ -30,7 +30,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
@@ -323,7 +323,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--input-file',
        type=str,
@@ -32,8 +32,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -772,7 +772,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:

 if __name__ == '__main__':
    # pdb.set_trace()
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -21,8 +21,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -239,7 +239,7 @@ If the program uses some packages that are incompatible, please figure out alter


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--use-knowledge',
        type=str,
@@ -26,7 +26,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
@@ -353,7 +353,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--input-file',
        type=str,
@@ -43,8 +43,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -732,7 +732,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -28,8 +28,8 @@ from evaluation.utils.shared import (
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -201,7 +201,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -644,7 +644,7 @@ SWEGYM_EXCLUDE_IDS = [
 ]

 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -41,7 +41,7 @@ from evaluation.utils.shared import (
    reset_logger_for_multiprocessing,
    run_evaluation,
 )
-from openhands.core.config import OpenHandsConfig, SandboxConfig, get_parser
+from openhands.core.config import OpenHandsConfig, SandboxConfig, get_evaluation_parser
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
 from openhands.events.action import CmdRunAction
@@ -484,7 +484,7 @@ def count_and_log_fields(evaluated_predictions, fields, key):


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--input-file', type=str, required=True, help='Path to input predictions file'
    )
@@ -37,8 +37,8 @@ from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
    SandboxConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -491,7 +491,7 @@ def prepare_dataset_pre(dataset: pd.DataFrame, filter_column: str) -> pd.DataFra


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -18,8 +18,8 @@ from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
    get_agent_config_arg,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
@@ -197,7 +197,7 @@ def run_evaluator(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--task-image-name',
        type=str,
@@ -19,8 +19,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -157,7 +157,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -565,7 +565,7 @@ SWEGYM_EXCLUDE_IDS = [
 ]

 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
@@ -1,5 +1,5 @@
 import { describe, expect, it } from "vitest";
-import { FileService } from "#/api/file-service/file-service.api";
+import OpenHands from "#/api/open-hands";
 import {
  FILE_VARIANTS_1,
  FILE_VARIANTS_2,
@@ -10,20 +10,20 @@ import {
 * You can find the mock handlers in `frontend/src/mocks/file-service-handlers.ts`.
 */

-describe("FileService", () => {
+describe("OpenHands File API", () => {
  it("should get a list of files", async () => {
-    await expect(FileService.getFiles("test-conversation-id")).resolves.toEqual(
+    await expect(OpenHands.getFiles("test-conversation-id")).resolves.toEqual(
      FILE_VARIANTS_1,
    );

    await expect(
-      FileService.getFiles("test-conversation-id-2"),
+      OpenHands.getFiles("test-conversation-id-2"),
    ).resolves.toEqual(FILE_VARIANTS_2);
  });

  it("should get content of a file", async () => {
    await expect(
-      FileService.getFile("test-conversation-id", "file1.txt"),
+      OpenHands.getFile("test-conversation-id", "file1.txt"),
    ).resolves.toEqual("Content of file1.txt");
  });
 });
@@ -3,8 +3,6 @@ import { afterEach, describe, expect, it, vi } from "vitest";
 import userEvent from "@testing-library/user-event";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
 import { LaunchMicroagentModal } from "#/components/features/chat/microagent/launch-microagent-modal";
-import { MemoryService } from "#/api/memory-service/memory-service.api";
-import { FileService } from "#/api/file-service/file-service.api";
 import { I18nKey } from "#/i18n/declaration";

 vi.mock("react-router", async () => ({
@@ -85,9 +85,10 @@ describe("ConversationPanel", () => {
    vi.clearAllMocks();
    vi.restoreAllMocks();
    // Setup default mock for getUserConversations
-    vi.spyOn(OpenHands, "getUserConversations").mockResolvedValue([
-      ...mockConversations,
-    ]);
+    vi.spyOn(OpenHands, "getUserConversations").mockResolvedValue({
+      results: [...mockConversations],
+      next_page_id: null,
+    });
  });

  it("should render the conversations", async () => {
@@ -101,7 +102,10 @@ describe("ConversationPanel", () => {

  it("should display an empty state when there are no conversations", async () => {
    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
-    getUserConversationsSpy.mockResolvedValue([]);
+    getUserConversationsSpy.mockResolvedValue({
+      results: [],
+      next_page_id: null,
+    });

    renderConversationPanel();

@@ -195,7 +199,10 @@ describe("ConversationPanel", () => {
    ];

    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
-    getUserConversationsSpy.mockImplementation(async () => mockData);
+    getUserConversationsSpy.mockImplementation(async () => ({
+      results: mockData,
+      next_page_id: null,
+    }));

    const deleteUserConversationSpy = vi.spyOn(
      OpenHands,
@@ -249,7 +256,10 @@ describe("ConversationPanel", () => {
  it("should refetch data on rerenders", async () => {
    const user = userEvent.setup();
    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
-    getUserConversationsSpy.mockResolvedValue([...mockConversations]);
+    getUserConversationsSpy.mockResolvedValue({
+      results: [...mockConversations],
+      next_page_id: null,
+    });

    function PanelWithToggle() {
      const [isOpen, setIsOpen] = React.useState(true);
@@ -343,7 +353,10 @@ describe("ConversationPanel", () => {
    ];

    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
-    getUserConversationsSpy.mockResolvedValue(mockRunningConversations);
+    getUserConversationsSpy.mockResolvedValue({
+      results: mockRunningConversations,
+      next_page_id: null,
+    });

    renderConversationPanel();

@@ -407,7 +420,10 @@ describe("ConversationPanel", () => {
    ];

    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
-    getUserConversationsSpy.mockImplementation(async () => mockData);
+    getUserConversationsSpy.mockImplementation(async () => ({
+      results: mockData,
+      next_page_id: null,
+    }));

    const stopConversationSpy = vi.spyOn(OpenHands, "stopConversation");
    stopConversationSpy.mockImplementation(async (id: string) => {
@@ -492,7 +508,10 @@ describe("ConversationPanel", () => {
    ];

    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
-    getUserConversationsSpy.mockResolvedValue(mockMixedStatusConversations);
+    getUserConversationsSpy.mockResolvedValue({
+      results: mockMixedStatusConversations,
+      next_page_id: null,
+    });

    renderConversationPanel();

@@ -12,16 +12,16 @@
        "@heroui/use-infinite-scroll": "^2.2.10",
        "@microlink/react-json-view": "^1.26.2",
        "@monaco-editor/react": "^4.7.0-rc.0",
-        "@react-router/node": "^7.7.1",
-        "@react-router/serve": "^7.7.1",
+        "@react-router/node": "^7.8.0",
+        "@react-router/serve": "^7.8.0",
        "@react-types/shared": "^3.31.0",
        "@reduxjs/toolkit": "^2.8.2",
        "@stripe/react-stripe-js": "^3.9.0",
        "@stripe/stripe-js": "^7.8.0",
        "@tailwindcss/postcss": "^4.1.11",
        "@tailwindcss/vite": "^4.1.11",
-        "@tanstack/react-query": "^5.84.1",
-        "@vitejs/plugin-react": "^4.7.0",
+        "@tanstack/react-query": "^5.84.2",
+        "@vitejs/plugin-react": "^5.0.0",
        "@xterm/addon-fit": "^0.10.0",
        "@xterm/xterm": "^5.4.0",
        "axios": "^1.11.0",
@@ -33,9 +33,9 @@
        "i18next-http-backend": "^3.0.2",
        "isbot": "^5.1.29",
        "jose": "^6.0.12",
-        "lucide-react": "^0.536.0",
+        "lucide-react": "^0.539.0",
        "monaco-editor": "^0.52.2",
-        "posthog-js": "^1.258.5",
+        "posthog-js": "^1.259.0",
        "react": "^19.1.1",
        "react-dom": "^19.1.1",
        "react-highlight": "^0.15.0",
@@ -44,7 +44,7 @@
        "react-icons": "^5.5.0",
        "react-markdown": "^10.1.0",
        "react-redux": "^9.2.0",
-        "react-router": "^7.7.1",
+        "react-router": "^7.8.0",
        "react-select": "^5.10.2",
        "react-syntax-highlighter": "^15.6.1",
        "react-textarea-autosize": "^8.5.9",
@@ -53,7 +53,7 @@
        "sirv-cli": "^3.0.1",
        "socket.io-client": "^4.8.1",
        "tailwind-merge": "^3.3.1",
-        "vite": "^7.0.6",
+        "vite": "^7.1.1",
        "web-vitals": "^5.1.0",
        "ws": "^8.18.2"
      },
@@ -63,7 +63,7 @@
        "@babel/types": "^7.28.2",
        "@mswjs/socket.io-binding": "^0.2.0",
        "@playwright/test": "^1.54.2",
-        "@react-router/dev": "^7.7.1",
+        "@react-router/dev": "^7.8.0",
        "@tailwindcss/typography": "^0.5.16",
        "@tanstack/eslint-plugin-query": "^5.83.1",
        "@testing-library/dom": "^10.4.1",
@@ -4388,11 +4388,10 @@
      }
    },
    "node_modules/@react-router/dev": {
-      "version": "7.7.1",
-      "resolved": "https://registry.npmjs.org/@react-router/dev/-/dev-7.7.1.tgz",
-      "integrity": "sha512-ByfgHmAyfx/JQYN/QwUx1sFJlBA5Z3HQAZ638wHSb+m6khWtHqSaKCvPqQh1P00wdEAeV3tX5L1aUM/ceCF6+w==",
+      "version": "7.8.0",
+      "resolved": "https://registry.npmjs.org/@react-router/dev/-/dev-7.8.0.tgz",
+      "integrity": "sha512-5NA9yLZComM+kCD3zNPL3rjrAFjzzODY8hjAJlpz/6jpyXoF28W8QTSo8rxc56XVNLONM75Y5nq1wzeEcWFFKA==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
        "@babel/core": "^7.27.7",
        "@babel/generator": "^7.27.5",
@@ -4402,7 +4401,9 @@
        "@babel/traverse": "^7.27.7",
        "@babel/types": "^7.27.7",
        "@npmcli/package-json": "^4.0.1",
-        "@react-router/node": "7.7.1",
+        "@react-router/node": "7.8.0",
+        "@vitejs/plugin-react": "^4.5.2",
+        "@vitejs/plugin-rsc": "0.4.11",
        "arg": "^5.0.1",
        "babel-dead-code-elimination": "^1.0.6",
        "chokidar": "^4.0.0",
@@ -4429,8 +4430,8 @@
        "node": ">=20.0.0"
      },
      "peerDependencies": {
-        "@react-router/serve": "^7.7.1",
-        "react-router": "^7.7.1",
+        "@react-router/serve": "^7.8.0",
+        "react-router": "^7.8.0",
        "typescript": "^5.1.0",
        "vite": "^5.1.0 || ^6.0.0 || ^7.0.0",
        "wrangler": "^3.28.2 || ^4.0.0"
@@ -4447,6 +4448,41 @@
        }
      }
    },
+    "node_modules/@react-router/dev/node_modules/@rolldown/pluginutils": {
+      "version": "1.0.0-beta.27",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
+      "integrity": "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==",
+      "dev": true
+    },
+    "node_modules/@react-router/dev/node_modules/@vitejs/plugin-react": {
+      "version": "4.7.0",
+      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz",
+      "integrity": "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==",
+      "dev": true,
+      "dependencies": {
+        "@babel/core": "^7.28.0",
+        "@babel/plugin-transform-react-jsx-self": "^7.27.1",
+        "@babel/plugin-transform-react-jsx-source": "^7.27.1",
+        "@rolldown/pluginutils": "1.0.0-beta.27",
+        "@types/babel__core": "^7.20.5",
+        "react-refresh": "^0.17.0"
+      },
+      "engines": {
+        "node": "^14.18.0 || >=16.0.0"
+      },
+      "peerDependencies": {
+        "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
+      }
+    },
+    "node_modules/@react-router/dev/node_modules/@vitejs/plugin-react/node_modules/react-refresh": {
+      "version": "0.17.0",
+      "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz",
+      "integrity": "sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
    "node_modules/@react-router/dev/node_modules/jsesc": {
      "version": "3.0.2",
      "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.0.2.tgz",
@@ -4460,33 +4496,10 @@
        "node": ">=6"
      }
    },
-    "node_modules/@react-router/express": {
-      "version": "7.7.1",
-      "resolved": "https://registry.npmjs.org/@react-router/express/-/express-7.7.1.tgz",
-      "integrity": "sha512-OEZwIM7i/KPSDjwVRg3LqeNIwG41U+SeFOwMjhZRFfyrnwghHfvWsDajf73r4ccMh+RRHcP1GIN6VSU3XZk7MA==",
-      "license": "MIT",
-      "dependencies": {
-        "@react-router/node": "7.7.1"
-      },
-      "engines": {
-        "node": ">=20.0.0"
-      },
-      "peerDependencies": {
-        "express": "^4.17.1 || ^5",
-        "react-router": "7.7.1",
-        "typescript": "^5.1.0"
-      },
-      "peerDependenciesMeta": {
-        "typescript": {
-          "optional": true
-        }
-      }
-    },
    "node_modules/@react-router/node": {
-      "version": "7.7.1",
-      "resolved": "https://registry.npmjs.org/@react-router/node/-/node-7.7.1.tgz",
-      "integrity": "sha512-EHd6PEcw2nmcJmcYTPA0MmRWSqOaJ/meycfCp0ADA9T/6b7+fUHfr9XcNyf7UeZtYwu4zGyuYfPmLU5ic6Ugyg==",
-      "license": "MIT",
+      "version": "7.8.0",
+      "resolved": "https://registry.npmjs.org/@react-router/node/-/node-7.8.0.tgz",
+      "integrity": "sha512-/FFN9vqI2EHPwDCHTvsMInhrYvwJ5SlCeyUr1oWUxH47JyYkooVFks5++M4VkrTgj2ZBsMjPPKy0xRNTQdtBDA==",
      "dependencies": {
        "@mjackson/node-fetch-server": "^0.2.0"
      },
@@ -4494,7 +4507,7 @@
        "node": ">=20.0.0"
      },
      "peerDependencies": {
-        "react-router": "7.7.1",
+        "react-router": "7.8.0",
        "typescript": "^5.1.0"
      },
      "peerDependenciesMeta": {
@@ -4504,13 +4517,12 @@
      }
    },
    "node_modules/@react-router/serve": {
-      "version": "7.7.1",
-      "resolved": "https://registry.npmjs.org/@react-router/serve/-/serve-7.7.1.tgz",
-      "integrity": "sha512-LyAiX+oI+6O6j2xWPUoKW+cgayUf3USBosSMv73Jtwi99XUhSDu2MUhM+BB+AbrYRubauZ83QpZTROiXoaf8jA==",
-      "license": "MIT",
+      "version": "7.8.0",
+      "resolved": "https://registry.npmjs.org/@react-router/serve/-/serve-7.8.0.tgz",
+      "integrity": "sha512-DokCv1GfOMt9KHu+k3WYY9sP5nOEzq7za+Vi3dWPHoY5oP0wgv8S4DnTPU08ASY8iFaF38NAzapbSFfu6Xfr0Q==",
      "dependencies": {
-        "@react-router/express": "7.7.1",
-        "@react-router/node": "7.7.1",
+        "@react-router/express": "7.8.0",
+        "@react-router/node": "7.8.0",
        "compression": "^1.7.4",
        "express": "^4.19.2",
        "get-port": "5.1.1",
@@ -4524,7 +4536,28 @@
        "node": ">=20.0.0"
      },
      "peerDependencies": {
-        "react-router": "7.7.1"
+        "react-router": "7.8.0"
+      }
+    },
+    "node_modules/@react-router/serve/node_modules/@react-router/express": {
+      "version": "7.8.0",
+      "resolved": "https://registry.npmjs.org/@react-router/express/-/express-7.8.0.tgz",
+      "integrity": "sha512-lNUwux5IfMqczIL3gXZ/mauPUoVz65fSLPnUTkP7hkh/P7fcsPtYkmcixuaWb+882lY+Glf157OdoIMbcSMBaA==",
+      "dependencies": {
+        "@react-router/node": "7.8.0"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      },
+      "peerDependencies": {
+        "express": "^4.17.1 || ^5",
+        "react-router": "7.8.0",
+        "typescript": "^5.1.0"
+      },
+      "peerDependenciesMeta": {
+        "typescript": {
+          "optional": true
+        }
      }
    },
    "node_modules/@react-stately/calendar": {
@@ -5226,10 +5259,9 @@
      }
    },
    "node_modules/@rolldown/pluginutils": {
-      "version": "1.0.0-beta.27",
-      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
-      "integrity": "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==",
-      "license": "MIT"
+      "version": "1.0.0-beta.30",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.30.tgz",
+      "integrity": "sha512-whXaSoNUFiyDAjkUF8OBpOm77Szdbk5lGNqFe6CbVbJFrhCCPinCbRA3NjawwlNHla1No7xvXXh+CpSxnPfUEw=="
    },
    "node_modules/@rollup/pluginutils": {
      "version": "5.2.0",
@@ -6072,6 +6104,60 @@
        "node": ">=14.0.0"
      }
    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
+      "version": "1.4.3",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.0.2",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
+      "version": "1.4.3",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
+      "version": "1.0.2",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
+      "version": "0.2.11",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "^1.4.3",
+        "@emnapi/runtime": "^1.4.3",
+        "@tybys/wasm-util": "^0.9.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
+      "version": "0.9.0",
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
+      "version": "2.8.0",
+      "inBundle": true,
+      "license": "0BSD",
+      "optional": true
+    },
    "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
      "version": "4.1.11",
      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",
@@ -6175,10 +6261,9 @@
      }
    },
    "node_modules/@tanstack/react-query": {
-      "version": "5.84.1",
-      "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.84.1.tgz",
-      "integrity": "sha512-zo7EUygcWJMQfFNWDSG7CBhy8irje/XY0RDVKKV4IQJAysb+ZJkkJPcnQi+KboyGUgT+SQebRFoTqLuTtfoDLw==",
-      "license": "MIT",
+      "version": "5.84.2",
+      "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.84.2.tgz",
+      "integrity": "sha512-cZadySzROlD2+o8zIfbD978p0IphuQzRWiiH3I2ugnTmz4jbjc0+TdibpwqxlzynEen8OulgAg+rzdNF37s7XQ==",
      "dependencies": {
        "@tanstack/query-core": "5.83.1"
      },
@@ -6951,20 +7036,19 @@
      "license": "ISC"
    },
    "node_modules/@vitejs/plugin-react": {
-      "version": "4.7.0",
-      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz",
-      "integrity": "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==",
-      "license": "MIT",
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.0.0.tgz",
+      "integrity": "sha512-Jx9JfsTa05bYkS9xo0hkofp2dCmp1blrKjw9JONs5BTHOvJCgLbaPSuZLGSVJW6u2qe0tc4eevY0+gSNNi0YCw==",
      "dependencies": {
        "@babel/core": "^7.28.0",
        "@babel/plugin-transform-react-jsx-self": "^7.27.1",
        "@babel/plugin-transform-react-jsx-source": "^7.27.1",
-        "@rolldown/pluginutils": "1.0.0-beta.27",
+        "@rolldown/pluginutils": "1.0.0-beta.30",
        "@types/babel__core": "^7.20.5",
        "react-refresh": "^0.17.0"
      },
      "engines": {
-        "node": "^14.18.0 || >=16.0.0"
+        "node": "^20.19.0 || >=22.12.0"
      },
      "peerDependencies": {
        "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
@@ -6979,6 +7063,32 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/@vitejs/plugin-rsc": {
+      "version": "0.4.11",
+      "resolved": "https://registry.npmjs.org/@vitejs/plugin-rsc/-/plugin-rsc-0.4.11.tgz",
+      "integrity": "sha512-+4H4wLi+Y9yF58znBfKgGfX8zcqUGt8ngnmNgzrdGdF1SVz7EO0sg7WnhK5fFVHt6fUxsVEjmEabsCWHKPL1Tw==",
+      "dev": true,
+      "dependencies": {
+        "@mjackson/node-fetch-server": "^0.7.0",
+        "es-module-lexer": "^1.7.0",
+        "estree-walker": "^3.0.3",
+        "magic-string": "^0.30.17",
+        "periscopic": "^4.0.2",
+        "turbo-stream": "^3.1.0",
+        "vitefu": "^1.1.1"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-dom": "*",
+        "vite": "*"
+      }
+    },
+    "node_modules/@vitejs/plugin-rsc/node_modules/@mjackson/node-fetch-server": {
+      "version": "0.7.0",
+      "resolved": "https://registry.npmjs.org/@mjackson/node-fetch-server/-/node-fetch-server-0.7.0.tgz",
+      "integrity": "sha512-un8diyEBKU3BTVj3GzlTPA1kIjCkGdD+AMYQy31Gf9JCkfoZzwgJ79GUtHrF2BN3XPNMLpubbzPcxys+a3uZEw==",
+      "dev": true
+    },
    "node_modules/@vitest/coverage-v8": {
      "version": "3.2.4",
      "resolved": "https://registry.npmjs.org/@vitest/coverage-v8/-/coverage-v8-3.2.4.tgz",
@@ -7161,7 +7271,6 @@
      "version": "1.3.8",
      "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
      "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==",
-      "license": "MIT",
      "dependencies": {
        "mime-types": "~2.1.34",
        "negotiator": "0.6.3"
@@ -7174,7 +7283,6 @@
      "version": "0.6.3",
      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
      "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -7315,8 +7423,7 @@
    "node_modules/array-flatten": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
-      "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==",
-      "license": "MIT"
+      "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg=="
    },
    "node_modules/array-includes": {
      "version": "3.1.9",
@@ -7681,7 +7788,6 @@
      "version": "1.20.3",
      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz",
      "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==",
-      "license": "MIT",
      "dependencies": {
        "bytes": "3.1.2",
        "content-type": "~1.0.5",
@@ -7705,7 +7811,6 @@
      "version": "2.6.9",
      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-      "license": "MIT",
      "dependencies": {
        "ms": "2.0.0"
      }
@@ -7713,8 +7818,7 @@
    "node_modules/body-parser/node_modules/ms": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT"
+      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
    },
    "node_modules/brace-expansion": {
      "version": "2.0.2",
@@ -8340,7 +8444,6 @@
      "version": "0.5.4",
      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
      "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
-      "license": "MIT",
      "dependencies": {
        "safe-buffer": "5.2.1"
      },
@@ -8352,7 +8455,6 @@
      "version": "1.0.5",
      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -8367,7 +8469,6 @@
      "version": "0.7.1",
      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz",
      "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -8375,8 +8476,7 @@
    "node_modules/cookie-signature": {
      "version": "1.0.6",
      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
-      "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==",
-      "license": "MIT"
+      "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ=="
    },
    "node_modules/core-js": {
      "version": "3.45.0",
@@ -8731,7 +8831,6 @@
      "version": "1.2.0",
      "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz",
      "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.8",
        "npm": "1.2.8000 || >= 1.4.16"
@@ -8857,7 +8956,6 @@
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.8"
      }
@@ -9210,8 +9308,7 @@
    "node_modules/escape-html": {
      "version": "1.0.3",
      "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
-      "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
-      "license": "MIT"
+      "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="
    },
    "node_modules/escape-string-regexp": {
      "version": "4.0.0",
@@ -9884,7 +9981,6 @@
      "version": "1.8.1",
      "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
      "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -9923,7 +10019,6 @@
      "version": "4.21.2",
      "resolved": "https://registry.npmjs.org/express/-/express-4.21.2.tgz",
      "integrity": "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==",
-      "license": "MIT",
      "dependencies": {
        "accepts": "~1.3.8",
        "array-flatten": "1.1.1",
@@ -9969,7 +10064,6 @@
      "version": "2.6.9",
      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-      "license": "MIT",
      "dependencies": {
        "ms": "2.0.0"
      }
@@ -9977,8 +10071,7 @@
    "node_modules/express/node_modules/ms": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT"
+      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
    },
    "node_modules/extend": {
      "version": "3.0.2",
@@ -10103,7 +10196,6 @@
      "version": "1.3.1",
      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
      "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
-      "license": "MIT",
      "dependencies": {
        "debug": "2.6.9",
        "encodeurl": "~2.0.0",
@@ -10121,7 +10213,6 @@
      "version": "2.6.9",
      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-      "license": "MIT",
      "dependencies": {
        "ms": "2.0.0"
      }
@@ -10129,8 +10220,7 @@
    "node_modules/finalhandler/node_modules/ms": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT"
+      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
    },
    "node_modules/find-root": {
      "version": "1.1.0",
@@ -10267,7 +10357,6 @@
      "version": "0.2.0",
      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -10317,7 +10406,6 @@
      "version": "0.5.2",
      "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
      "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -10930,7 +11018,6 @@
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
      "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==",
-      "license": "MIT",
      "dependencies": {
        "depd": "2.0.0",
        "inherits": "2.0.4",
@@ -11039,7 +11126,6 @@
      "version": "0.4.24",
      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
      "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
-      "license": "MIT",
      "dependencies": {
        "safer-buffer": ">= 2.1.2 < 3"
      },
@@ -11168,7 +11254,6 @@
      "version": "1.9.1",
      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.10"
      }
@@ -11517,6 +11602,15 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/is-reference": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-3.0.3.tgz",
+      "integrity": "sha512-ixkJoqQvAP88E6wLydLGGqCJsrFUnqoH6HnaczB8XmDH1oaWU+xxdptvikTgaEhtZ53Ky6YXiBuUI2WXLMCwjw==",
+      "dev": true,
+      "dependencies": {
+        "@types/estree": "^1.0.6"
+      }
+    },
    "node_modules/is-regex": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.2.1.tgz",
@@ -12625,10 +12719,9 @@
      }
    },
    "node_modules/lucide-react": {
-      "version": "0.536.0",
-      "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.536.0.tgz",
-      "integrity": "sha512-2PgvNa9v+qz4Jt/ni8vPLt4jwoFybXHuubQT8fv4iCW5TjDxkbZjNZZHa485ad73NSEn/jdsEtU57eE1g+ma8A==",
-      "license": "ISC",
+      "version": "0.539.0",
+      "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.539.0.tgz",
+      "integrity": "sha512-VVISr+VF2krO91FeuCrm1rSOLACQUYVy7NQkzrOty52Y8TlTPcXcMdQFj9bYzBgXbWCiywlwSZ3Z8u6a+6bMlg==",
      "peerDependencies": {
        "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
      }
@@ -12999,7 +13092,6 @@
      "version": "0.3.0",
      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
      "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -13014,7 +13106,6 @@
      "version": "1.0.3",
      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz",
      "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==",
-      "license": "MIT",
      "funding": {
        "url": "https://github.com/sponsors/sindresorhus"
      }
@@ -13033,7 +13124,6 @@
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
      "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -13619,7 +13709,6 @@
      "version": "1.6.0",
      "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
      "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
-      "license": "MIT",
      "bin": {
        "mime": "cli.js"
      },
@@ -14224,7 +14313,6 @@
      "version": "2.4.1",
      "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
      "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
-      "license": "MIT",
      "dependencies": {
        "ee-first": "1.1.1"
      },
@@ -14427,7 +14515,6 @@
      "version": "1.3.3",
      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.8"
      }
@@ -14495,8 +14582,7 @@
    "node_modules/path-to-regexp": {
      "version": "0.1.12",
      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
-      "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==",
-      "license": "MIT"
+      "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ=="
    },
    "node_modules/path-type": {
      "version": "4.0.0",
@@ -14524,6 +14610,17 @@
        "node": ">= 14.16"
      }
    },
+    "node_modules/periscopic": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/periscopic/-/periscopic-4.0.2.tgz",
+      "integrity": "sha512-sqpQDUy8vgB7ycLkendSKS6HnVz1Rneoc3Rc+ZBUCe2pbqlVuCC5vF52l0NJ1aiMg/r1qfYF9/myz8CZeI2rjA==",
+      "dev": true,
+      "dependencies": {
+        "@types/estree": "*",
+        "is-reference": "^3.0.2",
+        "zimmerframe": "^1.0.0"
+      }
+    },
    "node_modules/picocolors": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@@ -14648,10 +14745,9 @@
      "license": "MIT"
    },
    "node_modules/posthog-js": {
-      "version": "1.258.6",
-      "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.258.6.tgz",
-      "integrity": "sha512-vL5AGG+rOoRg3LGquMfBPO55jD4bGl0CiV44SHdHAoBnOVDDAqxczRGDqMdxor+VLx3/ofTFOJ2FNprfAHp70Q==",
-      "license": "SEE LICENSE IN LICENSE",
+      "version": "1.259.0",
+      "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.259.0.tgz",
+      "integrity": "sha512-6usLnJshky8fQ82ask7PIJh4BSFOU0VkRbFg8Zanm/HIlYMG1VOdRWlToA63JXeO7Bzm9TuREq1wFm5U2VEVCg==",
      "dependencies": {
        "core-js": "^3.38.1",
        "fflate": "^0.4.8",
@@ -14825,7 +14921,6 @@
      "version": "2.0.7",
      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
-      "license": "MIT",
      "dependencies": {
        "forwarded": "0.2.0",
        "ipaddr.js": "1.9.1"
@@ -14910,7 +15005,6 @@
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
@@ -14919,7 +15013,6 @@
      "version": "2.5.2",
      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
      "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
-      "license": "MIT",
      "dependencies": {
        "bytes": "3.1.2",
        "http-errors": "2.0.0",
@@ -15126,10 +15219,9 @@
      }
    },
    "node_modules/react-router": {
-      "version": "7.7.1",
-      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.7.1.tgz",
-      "integrity": "sha512-jVKHXoWRIsD/qS6lvGveckwb862EekvapdHJN/cGmzw40KnJH5gg53ujOJ4qX6EKIK9LSBfFed/xiQ5yeXNrUA==",
-      "license": "MIT",
+      "version": "7.8.0",
+      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.8.0.tgz",
+      "integrity": "sha512-r15M3+LHKgM4SOapNmsH3smAizWds1vJ0Z9C4mWaKnT9/wD7+d/0jYcj6LmOvonkrO4Rgdyp4KQ/29gWN2i1eg==",
      "dependencies": {
        "cookie": "^1.0.1",
        "set-cookie-parser": "^2.6.0"
@@ -15898,7 +15990,6 @@
      "version": "0.19.0",
      "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz",
      "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==",
-      "license": "MIT",
      "dependencies": {
        "debug": "2.6.9",
        "depd": "2.0.0",
@@ -15922,7 +16013,6 @@
      "version": "2.6.9",
      "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
      "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
-      "license": "MIT",
      "dependencies": {
        "ms": "2.0.0"
      }
@@ -15930,14 +16020,12 @@
    "node_modules/send/node_modules/debug/node_modules/ms": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
-      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
-      "license": "MIT"
+      "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
    },
    "node_modules/send/node_modules/encodeurl": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
      "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.8"
      }
@@ -15946,7 +16034,6 @@
      "version": "1.16.2",
      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz",
      "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==",
-      "license": "MIT",
      "dependencies": {
        "encodeurl": "~2.0.0",
        "escape-html": "~1.0.3",
@@ -16015,8 +16102,7 @@
    "node_modules/setprototypeof": {
      "version": "1.2.0",
      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
-      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
-      "license": "ISC"
+      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
    },
    "node_modules/shebang-command": {
      "version": "2.0.0",
@@ -17054,7 +17140,6 @@
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
      "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
-      "license": "MIT",
      "engines": {
        "node": ">=0.6"
      }
@@ -17180,6 +17265,12 @@
      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
      "license": "0BSD"
    },
+    "node_modules/turbo-stream": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/turbo-stream/-/turbo-stream-3.1.0.tgz",
+      "integrity": "sha512-tVI25WEXl4fckNEmrq70xU1XumxUwEx/FZD5AgEcV8ri7Wvrg2o7GEq8U7htrNx3CajciGm+kDyhRf5JB6t7/A==",
+      "dev": true
+    },
    "node_modules/type-check": {
      "version": "0.4.0",
      "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
@@ -17210,7 +17301,6 @@
      "version": "1.6.18",
      "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
      "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
-      "license": "MIT",
      "dependencies": {
        "media-typer": "0.3.0",
        "mime-types": "~2.1.24"
@@ -17438,7 +17528,6 @@
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
      "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.8"
      }
@@ -17559,7 +17648,6 @@
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
      "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==",
-      "license": "MIT",
      "engines": {
        "node": ">= 0.4.0"
      }
@@ -17638,16 +17726,15 @@
      }
    },
    "node_modules/vite": {
-      "version": "7.0.6",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-7.0.6.tgz",
-      "integrity": "sha512-MHFiOENNBd+Bd9uvc8GEsIzdkn1JxMmEeYX35tI3fv0sJBUTfW5tQsoaOwuY4KhBI09A3dUJ/DXf2yxPVPUceg==",
-      "license": "MIT",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-7.1.1.tgz",
+      "integrity": "sha512-yJ+Mp7OyV+4S+afWo+QyoL9jFWD11QFH0i5i7JypnfTcA1rmgxCbiA8WwAICDEtZ1Z1hzrVhN8R8rGTqkTY8ZQ==",
      "dependencies": {
        "esbuild": "^0.25.0",
        "fdir": "^6.4.6",
        "picomatch": "^4.0.3",
        "postcss": "^8.5.6",
-        "rollup": "^4.40.0",
+        "rollup": "^4.43.0",
        "tinyglobby": "^0.2.14"
      },
      "bin": {
@@ -17816,6 +17903,25 @@
        "url": "https://github.com/sponsors/jonschlinkert"
      }
    },
+    "node_modules/vitefu": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/vitefu/-/vitefu-1.1.1.tgz",
+      "integrity": "sha512-B/Fegf3i8zh0yFbpzZ21amWzHmuNlLlmJT6n7bu5e+pCHUKQIfXSYokrqOBGEMMe9UG2sostKQF9mml/vYaWJQ==",
+      "dev": true,
+      "workspaces": [
+        "tests/deps/*",
+        "tests/projects/*",
+        "tests/projects/workspace/packages/*"
+      ],
+      "peerDependencies": {
+        "vite": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0-beta.0"
+      },
+      "peerDependenciesMeta": {
+        "vite": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/vitest": {
      "version": "3.2.4",
      "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz",
@@ -18418,6 +18524,12 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
+    "node_modules/zimmerframe": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/zimmerframe/-/zimmerframe-1.1.2.tgz",
+      "integrity": "sha512-rAbqEGa8ovJy4pyBxZM70hg4pE6gDgaQ0Sl9M3enG3I0d6H4XSAM3GeNGLKnsBpuijUow064sf7ww1nutC5/3w==",
+      "dev": true
+    },
    "node_modules/zwitch": {
      "version": "2.0.4",
      "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz",
@@ -11,16 +11,16 @@
    "@heroui/use-infinite-scroll": "^2.2.10",
    "@microlink/react-json-view": "^1.26.2",
    "@monaco-editor/react": "^4.7.0-rc.0",
-    "@react-router/node": "^7.7.1",
-    "@react-router/serve": "^7.7.1",
+    "@react-router/node": "^7.8.0",
+    "@react-router/serve": "^7.8.0",
    "@react-types/shared": "^3.31.0",
    "@reduxjs/toolkit": "^2.8.2",
    "@stripe/react-stripe-js": "^3.9.0",
    "@stripe/stripe-js": "^7.8.0",
    "@tailwindcss/postcss": "^4.1.11",
    "@tailwindcss/vite": "^4.1.11",
-    "@tanstack/react-query": "^5.84.1",
-    "@vitejs/plugin-react": "^4.7.0",
+    "@tanstack/react-query": "^5.84.2",
+    "@vitejs/plugin-react": "^5.0.0",
    "@xterm/addon-fit": "^0.10.0",
    "@xterm/xterm": "^5.4.0",
    "axios": "^1.11.0",
@@ -32,9 +32,9 @@
    "i18next-http-backend": "^3.0.2",
    "isbot": "^5.1.29",
    "jose": "^6.0.12",
-    "lucide-react": "^0.536.0",
+    "lucide-react": "^0.539.0",
    "monaco-editor": "^0.52.2",
-    "posthog-js": "^1.258.5",
+    "posthog-js": "^1.259.0",
    "react": "^19.1.1",
    "react-dom": "^19.1.1",
    "react-highlight": "^0.15.0",
@@ -43,7 +43,7 @@
    "react-icons": "^5.5.0",
    "react-markdown": "^10.1.0",
    "react-redux": "^9.2.0",
-    "react-router": "^7.7.1",
+    "react-router": "^7.8.0",
    "react-select": "^5.10.2",
    "react-syntax-highlighter": "^15.6.1",
    "react-textarea-autosize": "^8.5.9",
@@ -52,7 +52,7 @@
    "sirv-cli": "^3.0.1",
    "socket.io-client": "^4.8.1",
    "tailwind-merge": "^3.3.1",
-    "vite": "^7.0.6",
+    "vite": "^7.1.1",
    "web-vitals": "^5.1.0",
    "ws": "^8.18.2"
  },
@@ -87,7 +87,7 @@
    "@babel/types": "^7.28.2",
    "@mswjs/socket.io-binding": "^0.2.0",
    "@playwright/test": "^1.54.2",
-    "@react-router/dev": "^7.7.1",
+    "@react-router/dev": "^7.8.0",
    "@tailwindcss/typography": "^0.5.16",
    "@tanstack/eslint-plugin-query": "^5.83.1",
    "@testing-library/dom": "^10.4.1",
@@ -1,66 +0,0 @@
-import { openHands } from "../open-hands-axios";
-import { GetFilesResponse, GetFileResponse } from "./file-service.types";
-import { getConversationUrl } from "../conversation.utils";
-import { FileUploadSuccessResponse } from "../open-hands.types";
-
-export class FileService {
-  /**
-   * Retrieve the list of files available in the workspace
-   * @param conversationId ID of the conversation
-   * @param path Path to list files from. If provided, it lists all the files in the given path
-   * @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
-   */
-  static async getFiles(
-    conversationId: string,
-    path?: string,
-  ): Promise<GetFilesResponse> {
-    const url = `${getConversationUrl(conversationId)}/list-files`;
-    const { data } = await openHands.get<GetFilesResponse>(url, {
-      params: { path },
-    });
-
-    return data;
-  }
-
-  /**
-   * Retrieve the content of a file
-   * @param conversationId ID of the conversation
-   * @param path Full path of the file to retrieve
-   * @returns Code content of the file
-   */
-  static async getFile(conversationId: string, path: string): Promise<string> {
-    const url = `${getConversationUrl(conversationId)}/select-file`;
-    const { data } = await openHands.get<GetFileResponse>(url, {
-      params: { file: path },
-    });
-
-    return data.code;
-  }
-
-  /**
-   * Upload multiple files to the workspace
-   * @param conversationId ID of the conversation
-   * @param files List of files.
-   * @returns list of uploaded files, list of skipped files
-   */
-  static async uploadFiles(
-    conversationId: string,
-    files: File[],
-  ): Promise<FileUploadSuccessResponse> {
-    const formData = new FormData();
-    for (const file of files) {
-      formData.append("files", file);
-    }
-    const url = `${getConversationUrl(conversationId)}/upload-files`;
-    const response = await openHands.post<FileUploadSuccessResponse>(
-      url,
-      formData,
-      {
-        headers: {
-          "Content-Type": "multipart/form-data",
-        },
-      },
-    );
-    return response.data;
-  }
-}
@@ -1,5 +0,0 @@
-export type GetFilesResponse = string[];
-
-export interface GetFileResponse {
-  code: string;
-}
@@ -1,21 +0,0 @@
-import { openHands } from "../open-hands-axios";
-
-interface GetPromptResponse {
-  status: string;
-  prompt: string;
-}
-
-export class MemoryService {
-  static async getPrompt(
-    conversationId: string,
-    eventId: number,
-  ): Promise<string> {
-    const { data } = await openHands.get<GetPromptResponse>(
-      `/api/conversations/${conversationId}/remember_prompt`,
-      {
-        params: { event_id: eventId },
-      },
-    );
-    return data.prompt;
-  }
-}
@@ -15,6 +15,9 @@ import {
  GetMicroagentPromptResponse,
  CreateMicroagent,
  MicroagentContentResponse,
+  FileUploadSuccessResponse,
+  GetFilesResponse,
+  GetFileResponse,
 } from "./open-hands.types";
 import { openHands } from "./open-hands-axios";
 import { ApiSettings, PostApiSettings, Provider } from "#/types/settings";
@@ -280,17 +283,27 @@ class OpenHands {
    return data;
  }

-  static async getUserConversations(): Promise<Conversation[]> {
+  static async getUserConversations(
+    limit: number = 20,
+    pageId?: string,
+  ): Promise<ResultSet<Conversation>> {
+    const params = new URLSearchParams();
+    params.append("limit", limit.toString());
+
+    if (pageId) {
+      params.append("page_id", pageId);
+    }
+
    const { data } = await openHands.get<ResultSet<Conversation>>(
-      "/api/conversations?limit=100",
+      `/api/conversations?${params.toString()}`,
    );
-    return data.results;
+    return data;
  }

  static async searchConversations(
    selectedRepository?: string,
    conversationTrigger?: string,
-    limit: number = 20,
+    limit: number = 100,
  ): Promise<Conversation[]> {
    const params = new URLSearchParams();
    params.append("limit", limit.toString());
@@ -618,12 +631,11 @@ class OpenHands {
    conversationId: string,
    eventId: number,
  ): Promise<string> {
-    const { data } = await openHands.get<GetMicroagentPromptResponse>(
-      `/api/conversations/${conversationId}/remember_prompt`,
-      {
-        params: { event_id: eventId },
-      },
-    );
+    const url = `${this.getConversationUrl(conversationId)}/remember-prompt`;
+    const { data } = await openHands.get<GetMicroagentPromptResponse>(url, {
+      params: { event_id: eventId },
+      headers: this.getConversationHeaders(),
+    });

    return data.prompt;
  }
@@ -640,6 +652,69 @@ class OpenHands {
    return data;
  }

+  /**
+   * Retrieve the list of files available in the workspace
+   * @param conversationId ID of the conversation
+   * @param path Path to list files from. If provided, it lists all the files in the given path
+   * @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
+   */
+  static async getFiles(
+    conversationId: string,
+    path?: string,
+  ): Promise<GetFilesResponse> {
+    const url = `${this.getConversationUrl(conversationId)}/list-files`;
+    const { data } = await openHands.get<GetFilesResponse>(url, {
+      params: { path },
+      headers: this.getConversationHeaders(),
+    });
+
+    return data;
+  }
+
+  /**
+   * Retrieve the content of a file
+   * @param conversationId ID of the conversation
+   * @param path Full path of the file to retrieve
+   * @returns Code content of the file
+   */
+  static async getFile(conversationId: string, path: string): Promise<string> {
+    const url = `${this.getConversationUrl(conversationId)}/select-file`;
+    const { data } = await openHands.get<GetFileResponse>(url, {
+      params: { file: path },
+      headers: this.getConversationHeaders(),
+    });
+
+    return data.code;
+  }
+
+  /**
+   * Upload multiple files to the workspace
+   * @param conversationId ID of the conversation
+   * @param files List of files.
+   * @returns list of uploaded files, list of skipped files
+   */
+  static async uploadFiles(
+    conversationId: string,
+    files: File[],
+  ): Promise<FileUploadSuccessResponse> {
+    const formData = new FormData();
+    for (const file of files) {
+      formData.append("files", file);
+    }
+    const url = `${this.getConversationUrl(conversationId)}/upload-files`;
+    const response = await openHands.post<FileUploadSuccessResponse>(
+      url,
+      formData,
+      {
+        headers: {
+          "Content-Type": "multipart/form-data",
+          ...this.getConversationHeaders(),
+        },
+      },
+    );
+    return response.data;
+  }
+
  /**
   * Get the user installation IDs
   * @param provider The provider to get installation IDs for (github, bitbucket, etc.)
@@ -158,3 +158,9 @@ export interface MicroagentContentResponse {
  git_provider: Provider;
  triggers: string[];
 }
+
+export type GetFilesResponse = string[];
+
+export interface GetFileResponse {
+  code: string;
+}
@@ -3,7 +3,8 @@ import { NavLink, useParams, useNavigate } from "react-router";
 import { useTranslation } from "react-i18next";
 import { I18nKey } from "#/i18n/declaration";
 import { ConversationCard } from "./conversation-card";
-import { useUserConversations } from "#/hooks/query/use-user-conversations";
+import { usePaginatedConversations } from "#/hooks/query/use-paginated-conversations";
+import { useInfiniteScroll } from "#/hooks/use-infinite-scroll";
 import { useDeleteConversation } from "#/hooks/mutation/use-delete-conversation";
 import { useStopConversation } from "#/hooks/mutation/use-stop-conversation";
 import { ConfirmDeleteModal } from "./confirm-delete-modal";
@@ -40,12 +41,30 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
    string | null
  >(null);

-  const { data: conversations, isFetching, error } = useUserConversations();
+  const {
+    data,
+    isFetching,
+    error,
+    hasNextPage,
+    isFetchingNextPage,
+    fetchNextPage,
+  } = usePaginatedConversations();
+
+  // Flatten all pages into a single array of conversations
+  const conversations = data?.pages.flatMap((page) => page.results) ?? [];

  const { mutate: deleteConversation } = useDeleteConversation();
  const { mutate: stopConversation } = useStopConversation();
  const { mutate: updateConversation } = useUpdateConversation();

+  // Set up infinite scroll
+  const scrollContainerRef = useInfiniteScroll({
+    hasNextPage: !!hasNextPage,
+    isFetchingNextPage,
+    fetchNextPage,
+    threshold: 200, // Load more when 200px from bottom
+  });
+
  const handleDeleteProject = (conversationId: string) => {
    setConfirmDeleteModalVisible(true);
    setSelectedConversationId(conversationId);
@@ -102,11 +121,16 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {

  return (
    <div
-      ref={ref}
+      ref={(node) => {
+        // TODO: Combine both refs somehow
+        if (ref.current !== node) ref.current = node;
+        if (scrollContainerRef.current !== node)
+          scrollContainerRef.current = node;
+      }}
      data-testid="conversation-panel"
      className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto absolute"
    >
-      {isFetching && (
+      {isFetching && conversations.length === 0 && (
        <div className="w-full h-full absolute flex justify-center items-center">
          <LoadingSpinner size="small" />
        </div>
@@ -156,6 +180,13 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
        </NavLink>
      ))}

+      {/* Loading indicator for fetching more conversations */}
+      {isFetchingNextPage && (
+        <div className="flex justify-center py-4">
+          <LoadingSpinner size="small" />
+        </div>
+      )}
+
      {confirmDeleteModalVisible && (
        <ConfirmDeleteModal
          onConfirm={() => {
@@ -25,6 +25,7 @@ const saveSettingsMutationFn = async (settings: Partial<PostSettings>) => {
    mcp_config: settings.MCP_CONFIG,
    enable_proactive_conversation_starters:
      settings.ENABLE_PROACTIVE_CONVERSATION_STARTERS,
+    enable_solvability_analysis: settings.ENABLE_SOLVABILITY_ANALYSIS,
    search_api_key: settings.SEARCH_API_KEY?.trim() || "",
    max_budget_per_task: settings.MAX_BUDGET_PER_TASK,
    git_user_name:
@@ -1,11 +1,11 @@
 import { useMutation } from "@tanstack/react-query";
-import { FileService } from "#/api/file-service/file-service.api";
+import OpenHands from "#/api/open-hands";

 export const useUploadFiles = () =>
  useMutation({
    mutationKey: ["upload-files"],
    mutationFn: (variables: { conversationId: string; files: File[] }) =>
-      FileService.uploadFiles(variables.conversationId!, variables.files),
+      OpenHands.uploadFiles(variables.conversationId!, variables.files),
    onSuccess: async () => {},
    meta: {
      disableToast: true,
@@ -1,13 +1,13 @@
 import { useQuery } from "@tanstack/react-query";
 import { useConversationId } from "../use-conversation-id";
-import { FileService } from "#/api/file-service/file-service.api";
+import OpenHands from "#/api/open-hands";

 export const useGetMicroagents = (microagentDirectory: string) => {
  const { conversationId } = useConversationId();

  return useQuery({
    queryKey: ["files", "microagents", conversationId, microagentDirectory],
-    queryFn: () => FileService.getFiles(conversationId!, microagentDirectory),
+    queryFn: () => OpenHands.getFiles(conversationId!, microagentDirectory),
    enabled: !!conversationId,
    select: (data) =>
      data.map((fileName) => fileName.replace(microagentDirectory, "")),
@@ -1,5 +1,5 @@
 import { useQuery } from "@tanstack/react-query";
-import { MemoryService } from "#/api/memory-service/memory-service.api";
+import OpenHands from "#/api/open-hands";
 import { useConversationId } from "../use-conversation-id";

 export const useMicroagentPrompt = (eventId: number) => {
@@ -7,7 +7,7 @@ export const useMicroagentPrompt = (eventId: number) => {

  return useQuery({
    queryKey: ["memory", "prompt", conversationId, eventId],
-    queryFn: () => MemoryService.getPrompt(conversationId!, eventId),
+    queryFn: () => OpenHands.getMicroagentPrompt(conversationId!, eventId),
    enabled: !!conversationId,
    staleTime: 1000 * 60 * 5, // 5 minutes
    gcTime: 1000 * 60 * 15, // 15 minutes
@@ -0,0 +1,16 @@
+import { useInfiniteQuery } from "@tanstack/react-query";
+import OpenHands from "#/api/open-hands";
+import { useIsAuthed } from "./use-is-authed";
+
+export const usePaginatedConversations = (limit: number = 20) => {
+  const { data: userIsAuthenticated } = useIsAuthed();
+
+  return useInfiniteQuery({
+    queryKey: ["user", "conversations", "paginated", limit],
+    queryFn: ({ pageParam }) =>
+      OpenHands.getUserConversations(limit, pageParam),
+    enabled: !!userIsAuthenticated,
+    getNextPageParam: (lastPage) => lastPage.next_page_id,
+    initialPageParam: undefined as string | undefined,
+  });
+};
@@ -4,7 +4,7 @@ import OpenHands from "#/api/open-hands";
 export const useSearchConversations = (
  selectedRepository?: string,
  conversationTrigger?: string,
-  limit: number = 20,
+  limit: number = 100,
  cacheDisabled: boolean = false,
 ) =>
  useQuery({
@@ -25,6 +25,7 @@ const getSettingsQueryFn = async (): Promise<Settings> => {
    ENABLE_SOUND_NOTIFICATIONS: apiSettings.enable_sound_notifications,
    ENABLE_PROACTIVE_CONVERSATION_STARTERS:
      apiSettings.enable_proactive_conversation_starters,
+    ENABLE_SOLVABILITY_ANALYSIS: apiSettings.enable_solvability_analysis,
    USER_CONSENTS_TO_ANALYTICS: apiSettings.user_consents_to_analytics,
    SEARCH_API_KEY: apiSettings.search_api_key || "",
    MAX_BUDGET_PER_TASK: apiSettings.max_budget_per_task,
@@ -1,13 +0,0 @@
-import { useQuery } from "@tanstack/react-query";
-import OpenHands from "#/api/open-hands";
-import { useIsAuthed } from "./use-is-authed";
-
-export const useUserConversations = () => {
-  const { data: userIsAuthenticated } = useIsAuthed();
-
-  return useQuery({
-    queryKey: ["user", "conversations"],
-    queryFn: OpenHands.getUserConversations,
-    enabled: !!userIsAuthenticated,
-  });
-};
@@ -0,0 +1,42 @@
+import { useEffect, useRef, useCallback } from "react";
+
+interface UseInfiniteScrollOptions {
+  hasNextPage: boolean;
+  isFetchingNextPage: boolean;
+  fetchNextPage: () => void;
+  threshold?: number;
+}
+
+export const useInfiniteScroll = ({
+  hasNextPage,
+  isFetchingNextPage,
+  fetchNextPage,
+  threshold = 100,
+}: UseInfiniteScrollOptions) => {
+  const containerRef = useRef<HTMLDivElement>(null);
+
+  const handleScroll = useCallback(() => {
+    if (!containerRef.current || isFetchingNextPage || !hasNextPage) {
+      return;
+    }
+
+    const { scrollTop, scrollHeight, clientHeight } = containerRef.current;
+    const isNearBottom = scrollTop + clientHeight >= scrollHeight - threshold;
+
+    if (isNearBottom) {
+      fetchNextPage();
+    }
+  }, [hasNextPage, isFetchingNextPage, fetchNextPage, threshold]);
+
+  useEffect(() => {
+    const container = containerRef.current;
+    if (!container) return undefined;
+
+    container.addEventListener("scroll", handleScroll);
+    return () => {
+      container.removeEventListener("scroll", handleScroll);
+    };
+  }, [handleScroll]);
+
+  return containerRef;
+};
@@ -151,6 +151,7 @@ export enum I18nKey {
  SETTINGS$MAX_BUDGET_PER_TASK = "SETTINGS$MAX_BUDGET_PER_TASK",
  SETTINGS$MAX_BUDGET_PER_CONVERSATION = "SETTINGS$MAX_BUDGET_PER_CONVERSATION",
  SETTINGS$PROACTIVE_CONVERSATION_STARTERS = "SETTINGS$PROACTIVE_CONVERSATION_STARTERS",
+  SETTINGS$SOLVABILITY_ANALYSIS = "SETTINGS$SOLVABILITY_ANALYSIS",
  SETTINGS$SEARCH_API_KEY = "SETTINGS$SEARCH_API_KEY",
  SETTINGS$SEARCH_API_KEY_OPTIONAL = "SETTINGS$SEARCH_API_KEY_OPTIONAL",
  SETTINGS$SEARCH_API_KEY_INSTRUCTIONS = "SETTINGS$SEARCH_API_KEY_INSTRUCTIONS",
@@ -2415,6 +2415,22 @@
        "tr": "GitHub'da Görevler Öner",
        "uk": "Запропонувати завдання на GitHub"
    },
+    "SETTINGS$SOLVABILITY_ANALYSIS": {
+        "en": "Enable Solvability Analysis",
+        "ja": "解決可能性分析を有効にする",
+        "zh-CN": "启用可解决性分析",
+        "zh-TW": "啟用可解決性分析",
+        "ko-KR": "해결 가능성 분석 활성화",
+        "de": "Lösbarkeitsanalyse aktivieren",
+        "no": "Aktiver løsningsanalyse",
+        "it": "Abilita analisi di risolvibilità",
+        "pt": "Ativar análise de solucionabilidade",
+        "es": "Habilitar análisis de solvencia",
+        "ar": "تمكين تحليل القابلية للحل",
+        "fr": "Activer l'analyse de solvabilité",
+        "tr": "Çözünürlük Analizini Etkinleştir",
+        "uk": "Увімкнути аналіз розв'язності"
+    },
    "SETTINGS$SEARCH_API_KEY": {
        "en": "Search API Key (Tavily)",
        "ja": "検索APIキー (Tavily)",
@@ -30,6 +30,7 @@ export const MOCK_DEFAULT_USER_SETTINGS: ApiSettings | PostApiSettings = {
  enable_sound_notifications: DEFAULT_SETTINGS.ENABLE_SOUND_NOTIFICATIONS,
  enable_proactive_conversation_starters:
    DEFAULT_SETTINGS.ENABLE_PROACTIVE_CONVERSATION_STARTERS,
+  enable_solvability_analysis: DEFAULT_SETTINGS.ENABLE_SOLVABILITY_ANALYSIS,
  user_consents_to_analytics: DEFAULT_SETTINGS.USER_CONSENTS_TO_ANALYTICS,
  max_budget_per_task: DEFAULT_SETTINGS.MAX_BUDGET_PER_TASK,
 };
@@ -38,6 +38,10 @@ function AppSettingsScreen() {
    proactiveConversationsSwitchHasChanged,
    setProactiveConversationsSwitchHasChanged,
  ] = React.useState(false);
+  const [
+    solvabilityAnalysisSwitchHasChanged,
+    setSolvabilityAnalysisSwitchHasChanged,
+  ] = React.useState(false);
  const [maxBudgetPerTaskHasChanged, setMaxBudgetPerTaskHasChanged] =
    React.useState(false);
  const [gitUserNameHasChanged, setGitUserNameHasChanged] =
@@ -61,6 +65,9 @@ function AppSettingsScreen() {
      formData.get("enable-proactive-conversations-switch")?.toString() ===
      "on";

+    const enableSolvabilityAnalysis =
+      formData.get("enable-solvability-analysis-switch")?.toString() === "on";
+
    const maxBudgetPerTaskValue = formData
      .get("max-budget-per-task-input")
      ?.toString();
@@ -79,6 +86,7 @@ function AppSettingsScreen() {
        user_consents_to_analytics: enableAnalytics,
        ENABLE_SOUND_NOTIFICATIONS: enableSoundNotifications,
        ENABLE_PROACTIVE_CONVERSATION_STARTERS: enableProactiveConversations,
+        ENABLE_SOLVABILITY_ANALYSIS: enableSolvabilityAnalysis,
        MAX_BUDGET_PER_TASK: maxBudgetPerTask,
        GIT_USER_NAME: gitUserName,
        GIT_USER_EMAIL: gitUserEmail,
@@ -136,6 +144,13 @@ function AppSettingsScreen() {
    );
  };

+  const checkIfSolvabilityAnalysisSwitchHasChanged = (checked: boolean) => {
+    const currentSolvabilityAnalysis = !!settings?.ENABLE_SOLVABILITY_ANALYSIS;
+    setSolvabilityAnalysisSwitchHasChanged(
+      checked !== currentSolvabilityAnalysis,
+    );
+  };
+
  const checkIfMaxBudgetPerTaskHasChanged = (value: string) => {
    const newValue = parseMaxBudgetPerTask(value);
    const currentValue = settings?.MAX_BUDGET_PER_TASK;
@@ -157,6 +172,7 @@ function AppSettingsScreen() {
    !analyticsSwitchHasChanged &&
    !soundNotificationsSwitchHasChanged &&
    !proactiveConversationsSwitchHasChanged &&
+    !solvabilityAnalysisSwitchHasChanged &&
    !maxBudgetPerTaskHasChanged &&
    !gitUserNameHasChanged &&
    !gitUserEmailHasChanged;
@@ -209,6 +225,17 @@ function AppSettingsScreen() {
            </SettingsSwitch>
          )}

+          {config?.APP_MODE === "saas" && (
+            <SettingsSwitch
+              testId="enable-solvability-analysis-switch"
+              name="enable-solvability-analysis-switch"
+              defaultIsToggled={!!settings.ENABLE_SOLVABILITY_ANALYSIS}
+              onToggle={checkIfSolvabilityAnalysisSwitchHasChanged}
+            >
+              {t(I18nKey.SETTINGS$SOLVABILITY_ANALYSIS)}
+            </SettingsSwitch>
+          )}
+
          <SettingsInput
            testId="max-budget-per-task-input"
            name="max-budget-per-task-input"
@@ -222,7 +249,7 @@ function AppSettingsScreen() {
            className="w-full max-w-[680px]" // Match the width of the language field
          />

-          <div className="border-t border-t-tertiary pt-6 mt-2">
+          <div className="border-t border-t-tertiary pt-6 mt-2 hidden">
            <h3 className="text-lg font-medium mb-4">
              {t(I18nKey.SETTINGS$GIT_SETTINGS)}
            </h3>
@@ -17,6 +17,7 @@ export const DEFAULT_SETTINGS: Settings = {
  ENABLE_SOUND_NOTIFICATIONS: false,
  USER_CONSENTS_TO_ANALYTICS: false,
  ENABLE_PROACTIVE_CONVERSATION_STARTERS: false,
+  ENABLE_SOLVABILITY_ANALYSIS: false,
  SEARCH_API_KEY: "",
  IS_NEW_USER: true,
  MAX_BUDGET_PER_TASK: null,
@@ -43,6 +43,7 @@ export type Settings = {
  ENABLE_DEFAULT_CONDENSER: boolean;
  ENABLE_SOUND_NOTIFICATIONS: boolean;
  ENABLE_PROACTIVE_CONVERSATION_STARTERS: boolean;
+  ENABLE_SOLVABILITY_ANALYSIS: boolean;
  USER_CONSENTS_TO_ANALYTICS: boolean | null;
  SEARCH_API_KEY?: string;
  IS_NEW_USER?: boolean;
@@ -68,6 +69,7 @@ export type ApiSettings = {
  enable_default_condenser: boolean;
  enable_sound_notifications: boolean;
  enable_proactive_conversation_starters: boolean;
+  enable_solvability_analysis: boolean;
  user_consents_to_analytics: boolean | null;
  search_api_key?: string;
  provider_tokens_set: Partial<Record<Provider, string | null>>;
@@ -23,11 +23,13 @@ export const VERIFIED_MODELS = [
  "devstral-medium-2507",
  "kimi-k2-0711-preview",
  "qwen3-coder-480b",
+  "gpt-5-2025-08-07",
 ];

 // LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
 // (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
 export const VERIFIED_OPENAI_MODELS = [
+  "gpt-5-2025-08-07",
  "gpt-4o",
  "gpt-4o-mini",
  "gpt-4.1",
@@ -63,6 +65,7 @@ export const VERIFIED_MISTRAL_MODELS = [
 // (e.g., they return `claude-sonnet-4-20250514` instead of `openhands/claude-sonnet-4-20250514`)
 export const VERIFIED_OPENHANDS_MODELS = [
  "claude-sonnet-4-20250514",
+  "gpt-5-2025-08-07",
  "claude-opus-4-20250514",
  "claude-opus-4-1-20250805",
  "gemini-2.5-pro",
@@ -1,572 +0,0 @@
-# Gemini Performance Investigation
-
-## Problem Statement
-RooCode (VSCode extension) runs Gemini 2.5 Pro very fast, but OpenHands runs the same LLM extremely slowly on the same account. This suggests different API usage patterns or hyperparameters.
-
-## Investigation Plan
-
-### Phase 1: Analyze RooCode Implementation
- [ ] Find RooCode's Gemini API integration code
- [ ] Identify API endpoint, authentication method, and request structure
- [ ] Document hyperparameters (temperature, max_tokens, top_p, top_k, etc.)
- [ ] Check if it uses streaming vs non-streaming responses
- [ ] Look for any special configurations or optimizations
-
-### Phase 2: Analyze OpenHands Implementation
- [ ] Find OpenHands' Gemini API integration code
- [ ] Identify API endpoint, authentication method, and request structure
- [ ] Document hyperparameters and compare with RooCode
- [ ] Check streaming configuration
- [ ] Look for any performance bottlenecks
-
-### Phase 3: Compare and Identify Differences
- [ ] Create side-by-side comparison of API calls
- [ ] Identify key differences in:
-  - Hyperparameters
-  - Request structure
-  - Authentication
-  - Streaming configuration
-  - Connection settings
-
-### Phase 4: Implement Fixes
- [ ] Apply RooCode's successful configuration to OpenHands
- [ ] Test performance improvements
- [ ] Document changes and rationale
-
-## Findings
-
-### RooCode Analysis
- Location: workspace/roocode
- Status: ✅ COMPLETED
-
-**Key Findings:**
-1. **Library**: Uses `@google/genai` (Google's official Gemini SDK)
-2. **API Method**: `client.models.generateContentStream()` for streaming
-3. **Default Temperature**: 0 (line 75 in gemini.ts)
-4. **Max Tokens**: Uses `modelMaxTokens` setting or model default
-5. **Streaming**: Always uses streaming responses
-6. **Reasoning Support**: Full support for thinking/reasoning tokens with `thinkingConfig`
-7. **Prompt Caching**: Supports prompt caching with `cachedContentTokenCount`
-8. **Request Structure**:
-   - Uses `GenerateContentParameters` with `model`, `contents`, `config`
-   - System instruction passed separately
-   - Temperature defaults to 0
-   - Supports reasoning budget and thinking tokens
-
-**RooCode Configuration Details:**
- **Default Model**: `gemini-2.0-flash-001` (line 6 in gemini.ts)
- **Temperature**: Always 0 unless reasoning models require 1.0
- **Streaming**: Uses `generateContentStream()` method
- **Reasoning Config**:
-  - For reasoning budget models: `{ thinkingBudget: reasoningBudget, includeThoughts: true }`
-  - Reasoning budget capped at 80% of maxTokens, minimum 1024 tokens
- **Authentication**: Supports API key, Vertex AI with JSON credentials, or key file
- **Base URL**: Configurable via `googleGeminiBaseUrl` option
- **Token Counting**: Uses native `client.models.countTokens()` method
- **Cost Calculation**: Sophisticated tiered pricing calculation with cache read support
-
-### OpenHands Analysis
- Location: openhands/llm/
- Status: ✅ COMPLETED
-
-**Key Findings:**
-1. **Library**: Uses LiteLLM (wrapper around multiple LLM providers)
-2. **API Method**: `litellm.completion()` - generic completion interface
-3. **Default Temperature**: 0.0 (line 69 in llm_config.py)
-4. **Max Tokens**: Uses `max_output_tokens` config setting
-5. **Streaming**: Configurable via `stream` parameter
-6. **Reasoning Support**: Limited - supports `reasoning_effort` for some models
-7. **Prompt Caching**: Enabled by default (`caching_prompt: true`)
-8. **Request Structure**:
-   - Uses LiteLLM's generic format (OpenAI-compatible)
-   - All parameters passed through LiteLLM's abstraction layer
-   - Special handling for Gemini tool calling limitations
-
-**OpenHands Configuration Details:**
- **Default Model**: `claude-sonnet-4-20250514` (not Gemini)
- **Temperature**: 0.0 by default
- **Streaming**: Not always used (depends on caller)
- **LiteLLM Abstraction**: All calls go through LiteLLM's generic interface
- **Gemini-specific Issues**:
-  - Tool calling limitations (removes default fields, limited format support)
-  - Special error handling for "Response choices is less than 1"
-  - Mock function calling for compatibility
- **Authentication**: Via `api_key` parameter
- **Base URL**: Configurable but uses LiteLLM's default endpoints
- **Token Counting**: Uses LiteLLM's generic token counting
- **Cost Calculation**: Uses LiteLLM's cost calculation
-
-### Key Differences
-
-**🔥 CRITICAL PERFORMANCE DIFFERENCES:**
-
-1. **API Library**:
-   - **RooCode**: Uses `@google/genai` (Google's official, optimized SDK)
-   - **OpenHands**: Uses LiteLLM (generic wrapper with abstraction overhead)
-
-2. **API Method**:
-   - **RooCode**: Direct `client.models.generateContentStream()` call
-   - **OpenHands**: Generic `litellm.completion()` with abstraction layers
-
-3. **Streaming**:
-   - **RooCode**: Always uses streaming (`generateContentStream`)
-   - **OpenHands**: May or may not use streaming (depends on caller)
-
-4. **Request Format**:
-   - **RooCode**: Native Gemini format (`GenerateContentParameters`)
-   - **OpenHands**: OpenAI-compatible format converted by LiteLLM
-
-5. **Authentication & Endpoints**:
-   - **RooCode**: Direct Google API endpoints with native auth
-   - **OpenHands**: Through LiteLLM's endpoint abstraction
-
-6. **Token Counting**:
-   - **RooCode**: Native `client.models.countTokens()` method
-   - **OpenHands**: LiteLLM's generic token counting (may be inaccurate)
-
-7. **Reasoning Support**:
-   - **RooCode**: Full native support with `thinkingConfig`
-   - **OpenHands**: Limited support through LiteLLM abstraction
-
-8. **Error Handling**:
-   - **RooCode**: Native Gemini error handling
-   - **OpenHands**: Multiple abstraction layers, special Gemini workarounds
-
-### Proposed Fixes
-
-**🎯 RECOMMENDED SOLUTION: Add Native Gemini Provider**
-
-The performance difference is likely due to LiteLLM's abstraction overhead and suboptimal Gemini integration. We should add a native Gemini provider to OpenHands similar to RooCode's implementation.
-
-**Implementation Plan:**
-
-1. **Create Native Gemini LLM Class** (`openhands/llm/gemini.py`):
-   - Use `@google/genai` library directly (or Python equivalent `google-generativeai`)
-   - Implement streaming by default
-   - Use native Gemini request format
-   - Support reasoning/thinking tokens properly
-
-2. **Update LLM Factory** (`openhands/llm/llm.py`):
-   - Detect Gemini models and route to native provider
-   - Fallback to LiteLLM for other models
-
-3. **Configuration Changes**:
-   - Add Gemini-specific config options
-   - Support native authentication methods
-   - Enable proper reasoning configuration
-
-4. **Testing Strategy**:
-   - Compare performance before/after
-   - Ensure feature parity with LiteLLM version
-   - Test with Gemini 2.5 Pro specifically
-
-**Alternative Quick Fixes (if native provider is too complex):**
-
-1. **Force Streaming**: Always use `stream=True` for Gemini models
-2. **Optimize LiteLLM Config**:
-   - Set `drop_params=False` for Gemini
-   - Use native tool calling when possible
-   - Configure proper reasoning parameters
-3. **Direct Endpoint**: Use Google's direct API endpoints instead of LiteLLM's
-
-## Next Steps
-
-### ✅ COMPLETED
-1. ✅ Explore RooCode codebase for Gemini integration
-2. ✅ Explore OpenHands codebase for Gemini integration
-3. ✅ Compare implementations
-4. ✅ Identify root cause (LiteLLM abstraction overhead)
-
-### ⚠️ INVESTIGATION UPDATE: DEEPER ANALYSIS NEEDED
-
-**🎯 INITIAL FINDING: LiteLLM is NOT the bottleneck!**
-
-**Performance Test Results (gemini-2.5-pro):**
-
-| Method | Configuration | Duration | Overhead |
-|--------|---------------|----------|----------|
-| **Native Google API** | Streaming | 25.863s | Baseline |
-| **Native Google API** | Non-streaming | 24.661s | Baseline |
-| **LiteLLM** | OpenHands streaming | 25.680s | +0.8s (3%) |
-| **LiteLLM** | OpenHands non-streaming | 26.564s | +1.9s (8%) |
-| **LiteLLM** | Minimal config | 29.368s | +4.7s (19%) |
-
-**🔍 Key Finding:** LiteLLM overhead is only 1-3 seconds (4-12%), NOT the 10x+ slowdown reported.
-
-**🚨 CRITICAL DISCOVERY: User reports RooCode is FAST with gemini-2.5-pro!**
-
-This contradicts our test results where ALL approaches with `gemini-2.5-pro` are slow (~25s).
-
-**🔬 Thinking Budget Investigation:**
-
-RooCode sets `thinkingConfig` for `gemini-2.5-pro` (marked as `requiredReasoningBudget: true`):
-```typescript
-// RooCode's approach
-thinkingConfig: { thinkingBudget: 4096, includeThoughts: true }
-```
-
-**Thinking Budget Test Results:**
- No thinking config: 25.979s
- Thinking disabled: 26.113s
- Small thinking budget (1024): 23.724s ⭐ (fastest)
-
-**🤔 HYPOTHESIS REFINEMENT:**
-1. **Model selection was premature** - RooCode IS fast with `gemini-2.5-pro`
-2. **Thinking budget helps slightly** - 2-3s improvement with small budget
-3. **Missing configuration** - RooCode likely has other optimizations we haven't found
-4. **Prompt differences** - RooCode may use different prompts/context
-
-**📊 Test Suite Results:**
-   ```bash
-   # All tests show similar slow performance with gemini-2.5-pro
-   python test_native_gemini.py     # 24-26s
-   python test_litellm_performance.py  # 25-29s
-   python test_openhands_litellm.py    # 25-31s
-   python test_thinking_budget.py      # 23-26s
-   ```
-
-### 🛠️ CURRENT EXPERIMENT: Google's Gemini CLI Analysis
-
-**🎯 NEW DISCOVERY: Google's Official Gemini CLI**
-
-Found Google's official open-source Gemini CLI in workspace directory - perfect for investigation!
-
-**✅ KEY FINDINGS:**
- **Uses native `@google/genai` SDK** (not LiteLLM) - direct comparison baseline
- **Has built-in debug mode**: `--debug` flag for detailed logging
- **Supports gemini-2.5-pro**: Default model is `gemini-2.5-pro`
- **Easy to modify**: Open source, can add custom logging if needed
-
-**🔬 INVESTIGATION PLAN:**
-1. **Test Gemini CLI performance** with `gemini-2.5-pro` in debug mode
-2. **Compare timing** with our test results (~25s)
-3. **Analyze debug output** to see exact API configuration
-4. **If needed**: Add custom logging to capture full request details
-5. **Compare** with RooCode's LiteLLM proxy approach
-
-**Commands to test:**
-```bash
-cd workspace/gemini-cli
-./bundle/gemini.js --model gemini-2.5-pro --debug --prompt "Hello, test message"
-```
-
-**Expected Benefits:**
- Direct performance comparison with native Google SDK
- Detailed debug output showing API configuration
- Easier to modify than browser extension
- Clear baseline for "fast" vs "slow" performance
-
-**Status:** ✅ **BREAKTHROUGH ACHIEVED!**
-
-**🚨 CRITICAL DISCOVERY:**
- **Gemini CLI with gemini-2.5-pro: 2.6-5.2 seconds** ⚡
- **Our test implementations: ~25 seconds** 🐌
- **Performance gap: 5-10x faster!**
-
-**Test Results:**
-```bash
-# Test 1: Simple greeting
-time ./bundle/gemini.js --model gemini-2.5-pro --debug --prompt "Hello, test message"
-# Result: 2.589s
-
-# Test 2: Code generation
-time ./bundle/gemini.js --model gemini-2.5-pro --debug --prompt "Write Python function"
-# Result: 5.188s
-```
-
-**✅ CONFIRMED:** Google's official CLI achieves the fast performance user reported!
-
-### 🎯 SECONDARY APPROACH: RooCode Extension Analysis
-
-**Plan B:** If Gemini CLI shows similar slow performance, investigate RooCode directly:
-1. **Find RooCode extension directory** in Windsurf
-2. **Add console.log statements** to capture LiteLLM proxy requests
-3. **Compare exact request payloads** with our test implementations
-
-### 🎯 CURRENT STATUS
-
-**✅ CONFIRMED FINDINGS:**
- **LiteLLM abstraction overhead is minimal** (only 1-3s difference, 4-12%)
- **All our test approaches show ~25s with gemini-2.5-pro** (Native API, LiteLLM, thinking budget)
- **RooCode uses LiteLLM proxy** (`llm-proxy.eval.all-hands.dev`) - NOT Google's direct API
- **Thinking budget provides small improvement** (2-3s faster) but not dramatic speedup
-
-**🎯 BREAKTHROUGH CONFIRMED:**
-Google's official Gemini CLI achieves **2.6-5.2s** with `gemini-2.5-pro` - validating user's fast performance reports!
-
-**🔍 NEXT PHASE:**
-Analyze what makes Gemini CLI fast vs our slow implementations (~25s) to identify the optimization gap.
-
-## 🚀 HTTP Request Analysis - BREAKTHROUGH ACHIEVED
-
-**MAJOR SUCCESS**: Successfully captured full HTTP request details from Gemini CLI!
-
-### Corrected Understanding
- **CORRECTION**: `play.googleapis.com` requests were telemetry logging, not actual API calls
- **ACTUAL API**: Gemini CLI uses same `generativelanguage.googleapis.com` endpoint as our implementations
- **REAL DIFFERENCE**: Configuration and request structure differences, not endpoint
-
-### Captured HTTP Requests
-
-#### Request 1: Model Test/Initialization (972ms)
-```bash
-🚀 FETCH REQUEST: {
-  method: 'POST',
-  url: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent',
-  headers: {
-    'Content-Type': 'application/json',
-    'x-goog-api-key': 'AIz...'
-  }
-}
-📤 REQUEST BODY: {
-  "contents":[{"parts":[{"text":"test"}]}],
-  "generationConfig":{
-    "maxOutputTokens":1,
-    "temperature":0,
-    "topK":1,
-    "thinkingConfig":{
-      "thinkingBudget":128,
-      "includeThoughts":false
-    }
-  }
-}
-```
-
-#### Request 2: Actual Generation (3714ms)
-```bash
-🚀 FETCH REQUEST: {
-  method: 'POST',
-  url: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:streamGenerateContent?alt=sse',
-  headers: {
-    'User-Agent': 'GeminiCLI/0.1.13 (darwin; arm64)',
-    'x-goog-api-client': 'google-genai-sdk/1.9.0 gl-node/v23.11.0',
-    'Content-Type': 'application/json',
-    'x-goog-api-key': 'AIz...'
-  }
-}
-```
-
-### Critical Configuration Differences
-
-1. **Thinking Budget**: Gemini CLI uses `thinkingBudget: 128` with `includeThoughts: false`
-2. **Streaming**: Uses `:streamGenerateContent?alt=sse` for streaming responses
-3. **SDK Headers**: Includes specific SDK identification headers:
-   - `User-Agent: GeminiCLI/0.1.13 (darwin; arm64)`
-   - `x-goog-api-client: google-genai-sdk/1.9.0 gl-node/v23.11.0`
-4. **Request Structure**: Two-phase approach (test + generation)
-5. **Model Initialization**: Separate test request with minimal output
-
-### Performance Analysis
- **Gemini CLI Total Time**: ~5s ⚡ (FAST - matches user reports)
- **Request 1**: 972ms (model initialization)
- **Request 2**: 3714ms (actual generation)
- **Total HTTP Time**: ~4.7s ✅ (matches fast total time)
-
-**vs Our Implementations**: ~25s 🐌 (5x slower)
-
-### Key Insights
-1. **Same Endpoint**: Both use `generativelanguage.googleapis.com` - no infrastructure advantage
-2. **Configuration is Key**: Speed difference comes from request configuration, not different endpoints
-3. **Streaming**: Gemini CLI uses `:streamGenerateContent?alt=sse`, we likely use non-streaming
-4. **SDK Headers**: Proper identification headers may affect routing/prioritization
-5. **Thinking Budget**: Uses `thinkingBudget: 128, includeThoughts: false`
-
-### Root Cause Identified
-The 5x performance gap is due to:
-1. **API Version**: New `google.genai` API vs old `google.generativeai` API
-2. **Thinking Budget**: Optimal setting of 128 tokens (Gemini CLI config)
-3. **Streaming vs non-streaming requests**
-4. **Missing SDK identification headers**
-5. **Two-phase request approach**
-
-### Major Breakthrough: API + Thinking Budget
-**Performance Results:**
- **New API + thinking_budget=128**: 9.6s ⚡ (3x faster than old API)
- **Old API default**: ~28s 🐌
- **Gemini CLI**: ~5s (target)
-
-**Gap Reduced**: From 5x to 2x difference remaining
-
-## 🎯 COMPREHENSIVE PERFORMANCE TESTING RESULTS
-
-**Date**: July 27, 2025
-**Status**: ✅ **COMPLETED** - All test failures fixed, comprehensive performance benchmarking completed
-
-### 🎉 All Test Failures Fixed - 100% Success Rate
-
-Successfully resolved all remaining compatibility issues between old and new Gemini APIs. All 16 test configurations now pass with 100% success rate.
-
-**Fixed Issues:**
- Thinking budget configuration syntax (`types.ThinkingConfig()`)
- Part API compatibility for function calls/responses
- JSON argument parsing for New API compatibility
- Tools configuration structure (passed in config object)
- Streaming response parsing in `extract_tool_call` function
-
-### 📊 Complete Performance Results (16 Configurations Tested)
-
-**Source**: Based on comprehensive testing with `comprehensive_performance_results.json`
-
-#### 🏆 **Fastest Configurations (5-10s)**
-1. **Old API (No Thinking)**: 5.298s - *Legacy genai API without thinking capabilities*
-2. **New API - Thinking Budget: 128**: 5.739s - *New genai API with 128-token thinking budget*
-3. **LiteLLM - Thinking Budget: 128**: 6.381s - *LiteLLM proxy with 128-token thinking budget*
-4. **New API - Thinking Budget: 1024**: 9.315s - *New genai API with 1024-token thinking budget*
-5. **New API - Thinking Budget: 4096**: 10.035s - *New genai API with 4096-token thinking budget*
-
-#### ⚡ **Medium Performance (15-20s)**
-6. **Thinking Budget: 128** (LiteLLM): 15.465s - *LiteLLM proxy with 128-token thinking budget*
-7. **LiteLLM with Streaming**: 15.475s - *LiteLLM proxy with streaming enabled*
-8. **Reasoning Effort: Low**: 16.179s - *LiteLLM proxy with low reasoning effort*
-9. **OpenHands Style (No Stream)**: 17.285s - *LiteLLM proxy using OpenHands configuration*
-10. **Reasoning Effort: High**: 17.427s - *LiteLLM proxy with high reasoning effort*
-
-#### 🐌 **Slower Configurations (17-22s)**
-11. **Basic LiteLLM**: 17.902s - *Standard LiteLLM proxy configuration*
-12. **Thinking Budget: 1024** (LiteLLM): 19.422s - *LiteLLM proxy with 1024-token thinking budget*
-13. **OpenHands Style (Streaming)**: 19.763s - *LiteLLM proxy using OpenHands configuration with streaming*
-14. **LiteLLM - Reasoning Effort: Low**: 21.093s - *LiteLLM proxy with low reasoning effort*
-15. **LiteLLM - Reasoning Effort: High**: 21.115s - *LiteLLM proxy with high reasoning effort*
-16. **Reasoning Effort: Medium**: 22.098s - *LiteLLM proxy with medium reasoning effort*
-
-### 🔍 Key Performance Insights
-
- **Thinking Budget 128 is optimal**: Provides best balance of speed (5.7-6.4s) and thinking capabilities
- **Direct API calls outperform proxy**: Native genai API calls are 2-3x faster than LiteLLM proxy
- **Reasoning Effort modes are slow**: 3-4x slower than thinking budget approaches (16-22s vs 5-10s)
- **Streaming provides modest benefits**: Small performance improvements in some configurations
- **Higher thinking budgets show diminishing returns**: 1024+ tokens don't significantly improve results but increase latency
-
-### 🛠️ OpenHands LLM Configuration Verification
-
-**Source**: `openhands/llm/llm.py` lines 195-210
-
-**Confirmed**: OpenHands automatically applies thinking budget optimization when `reasoning_effort` is `None`:
-
-```python
-if self.config.reasoning_effort is None:
-    # Default optimized thinking budget when not explicitly set
-    # Based on performance testing: 128 tokens achieves ~2.4x speedup
-    kwargs['thinking'] = {'budget_tokens': 128}
-```
-
-This means OpenHands users get the optimal 128-token thinking budget by default, achieving the 5.7s performance tier.
-
-### 📋 Test Configurations Explained
-
-#### Direct API Tests (via `test_thinking_budget.py`)
- **Old API (No Thinking)**: Legacy `google.generativeai` without thinking capabilities
- **New API - Thinking Budget 128/1024/4096**: New `google.genai` with various thinking token budgets
- **LiteLLM - Thinking Budget 128**: LiteLLM proxy with 128-token thinking budget
- **LiteLLM - Reasoning Effort Low/High**: LiteLLM proxy with reasoning effort settings
-
-#### LiteLLM Proxy Tests (via `test_litellm_comprehensive.py`)
- **Basic LiteLLM**: Standard LiteLLM proxy configuration
- **LiteLLM with Streaming**: LiteLLM proxy with streaming enabled
- **OpenHands Style**: LiteLLM proxy using OpenHands-style configuration
- **Reasoning Effort Low/Medium/High**: LiteLLM proxy with various reasoning effort levels
- **Thinking Budget 128/1024**: LiteLLM proxy with thinking budget configurations
-
-### 📝 TODO: Future Testing Improvements
-
-**For tomorrow (not now):**
- Add tests using actual LiteLLM and OpenHands libraries (not simulating their configs)
- Test real OpenHands integration with live LiteLLM proxy
- Benchmark actual production OpenHands usage patterns
- Compare with real RooCode extension performance in production
-
-### 🎯 Recommendations
-
-1. **Use Thinking Budget 128**: Optimal performance/capability balance
-2. **Prefer Direct API**: When possible, use native genai API over LiteLLM proxy
-3. **Avoid Reasoning Effort**: 3-4x slower than thinking budget approaches
-4. **Enable Streaming**: Provides modest but consistent performance improvements
-5. **Default Configuration**: OpenHands' default (reasoning_effort=None) automatically uses optimal 128-token thinking budget
-
-### 📊 LiteLLM Internal Mapping Revealed
-
-**Source**: Debug output from LiteLLM comprehensive testing
-
-From debug output, discovered LiteLLM's reasoning_effort mapping:
- `reasoning_effort="low"` → `thinkingBudget: 1024` (21.093s)
- `reasoning_effort="medium"` → `thinkingBudget: 2048` (22.098s - slowest!)
- `reasoning_effort="high"` → `thinkingBudget: 4096` (21.115s)
- `thinking={"budget_tokens": 128}` → `thinkingBudget: 128` (15.465s - fastest!)
-
-**🔍 LiteLLM Debug Output Example:**
-```json
-{
-  "thinkingConfig": {
-    "thinkingBudget": 1024,
-    "includeThoughts": true
-  }
-}
-```
-
-**Key Insight**: LiteLLM's `reasoning_effort` settings use much larger thinking budgets (1024-4096 tokens) compared to the optimal 128 tokens, explaining the 3-4x performance difference.
-
-### Implementation Recommendations
-
-**For OpenHands Gemini Integration:**
-1. **Use 128-token thinking budget** instead of default/large budgets
-2. **LiteLLM Configuration**: Use `thinking={"budget_tokens": 128}` instead of `reasoning_effort`
-3. **Avoid**: `reasoning_effort="medium"` (slowest configuration!)
-4. **Target**: Apply remaining optimizations to close 2x gap
-
-### Remaining Investigation
-**2x Performance Gap (11.366s → ~5s):**
-1. **Streaming vs non-streaming** requests
-2. **SDK identification headers** (`User-Agent`, `x-goog-api-client`)
-3. **Two-phase request approach** (test + generation)
-4. **Request structure optimizations**
-
-## 🚀 IMPLEMENTATION: OpenHands Gemini Performance Fix
-
-**Date**: December 26, 2024
-**Status**: ✅ **IMPLEMENTED** - Fix deployed and tested successfully
-
-### Implementation Details
-
-**Modified**: `openhands/llm/llm.py`
-```python
-# For Gemini models, use optimized thinking budget instead of reasoning_effort
-# Based on performance testing: 128 tokens achieves ~2.4x speedup vs reasoning_effort
-if 'gemini' in self.config.model.lower():
-    kwargs['thinking'] = {"budget_tokens": 128}
-else:
-    kwargs['reasoning_effort'] = self.config.reasoning_effort
-```
-
-**Created**: `test_openhands_gemini_fix.py` - Verification test suite
-
-### 🏆 Performance Results
-
-**Test 1**: 10.432s ⚡
-**Test 2**: 9.309s ⚡
-**Average**: ~9.9s (excellent consistency)
-
-**Improvement**: 2.5x speedup (from ~25s to ~10s)
-
-### ✅ Verification
-
-1. **Configuration Check**: ✅ Fix applies correctly to gemini-2.5-pro
-2. **Performance Test**: ✅ Consistent ~10s response times
-3. **Functionality Test**: ✅ Proper responses generated
-4. **Code Quality**: ✅ Passes all pre-commit hooks
-
-### Impact Analysis
-
-**Before Fix**:
- Used `reasoning_effort='high'` → ~25s response time
- Suboptimal LiteLLM parameter mapping
-
-**After Fix**:
- Uses `thinking={"budget_tokens": 128}` → ~10s response time
- Optimal configuration matching Gemini CLI performance
-
-### Next Steps
-1. **✅ DONE**: Comprehensive thinking budget analysis
-2. **✅ DONE**: LiteLLM parameter mapping discovery
-3. **✅ DONE**: 128-token thinking budget implemented in OpenHands
-4. **Remaining**: Investigate final 2x gap (10s → 5s) with streaming/headers
-5. **Target**: Achieve complete performance parity with Gemini CLI
@@ -1,367 +0,0 @@
-# Gemini 2.5 Pro API Message Structure and Configuration
-
-This document provides comprehensive information about the Gemini API message structure, system instructions, and generationConfig based on official Google documentation.
-
-## Key Findings
-
-### System Instructions
- **System instructions are NOT part of the contents array**
- **System instructions are sent as a separate `systemInstruction` field**
- **No specific ordering requirement for system messages within contents**
-
-### Message Structure
- **Contents array contains conversation messages in chronological order**
- **Each message has a `role` (user/model) and `parts` array**
- **System instructions are separate from conversation flow**
-
-## API Request Structure
-
-### Basic Structure
-```json
-{
-  "systemInstruction": {
-    "parts": [
-      {
-        "text": "You are a helpful assistant."
-      }
-    ]
-  },
-  "contents": [
-    {
-      "role": "user",
-      "parts": [
-        {
-          "text": "Hello, how are you?"
-        }
-      ]
-    }
-  ],
-  "generationConfig": {
-    "temperature": 0.7,
-    "topP": 0.8,
-    "topK": 40,
-    "thinkingConfig": {
-      "includeThoughts": true
-    }
-  }
-}
-```
-
-## System Instructions
-
-### Key Points
- System instructions are **separate from the contents array**
- They are sent in the `systemInstruction` field at the root level
- System instructions guide the overall behavior of the model
-
-### REST API Example
-```bash
-curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \
-  -H "x-goog-api-key: $GEMINI_API_KEY" \
-  -H 'Content-Type: application/json' \
-  -d '{
-    "system_instruction": {
-      "parts": [
-        {
-          "text": "You are a cat. Your name is Neko."
-        }
-      ]
-    },
-    "contents": [
-      {
-        "parts": [
-          {
-            "text": "Hello there"
-          }
-        ]
-      }
-    ]
-  }'
-```
-
-### Python SDK Example
-```python
-from google import genai
-from google.genai import types
-
-client = genai.Client()
-
-response = client.models.generate_content(
-    model="gemini-2.5-flash",
-    config=types.GenerateContentConfig(
-        system_instruction="You are a cat. Your name is Neko."
-    ),
-    contents="Hello there"
-)
-```
-
-### JavaScript SDK Example
-```javascript
-import { GoogleGenAI } from "@google/genai";
-
-const ai = new GoogleGenAI({});
-
-const response = await ai.models.generateContent({
-  model: "gemini-2.5-flash",
-  contents: "Hello there",
-  config: {
-    systemInstruction: "You are a cat. Your name is Neko.",
-  },
-});
-```
-
-## Multi-turn Conversations (Chat)
-
-### Message Ordering
- **No requirement for system messages to be first in contents**
- **Contents array follows chronological conversation order**
- **Roles alternate between "user" and "model"**
-
-### REST API Chat Example
-```bash
-curl https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent \
-  -H "x-goog-api-key: $GEMINI_API_KEY" \
-  -H 'Content-Type: application/json' \
-  -X POST \
-  -d '{
-    "contents": [
-      {
-        "role": "user",
-        "parts": [
-          {
-            "text": "Hello"
-          }
-        ]
-      },
-      {
-        "role": "model",
-        "parts": [
-          {
-            "text": "Great to meet you. What would you like to know?"
-          }
-        ]
-      },
-      {
-        "role": "user",
-        "parts": [
-          {
-            "text": "I have two dogs in my house. How many paws are in my house?"
-          }
-        ]
-      }
-    ]
-  }'
-```
-
-### Python Chat Example
-```python
-from google import genai
-
-client = genai.Client()
-chat = client.chats.create(model="gemini-2.5-flash")
-
-response = chat.send_message("I have 2 dogs in my house.")
-print(response.text)
-
-response = chat.send_message("How many paws are in my house?")
-print(response.text)
-
-for message in chat.get_history():
-    print(f'role - {message.role}: {message.parts[0].text}')
-```
-
-### JavaScript Chat Example
-```javascript
-import { GoogleGenAI } from "@google/genai";
-
-const ai = new GoogleGenAI({});
-
-const chat = ai.chats.create({
-  model: "gemini-2.5-flash",
-  history: [
-    {
-      role: "user",
-      parts: [{ text: "Hello" }],
-    },
-    {
-      role: "model",
-      parts: [{ text: "Great to meet you. What would you like to know?" }],
-    },
-  ],
-});
-
-const response1 = await chat.sendMessage({
-  message: "I have 2 dogs in my house.",
-});
-
-const response2 = await chat.sendMessage({
-  message: "How many paws are in my house?",
-});
-```
-
-## Generation Configuration
-
-### Basic Configuration
-```json
-{
-  "generationConfig": {
-    "temperature": 1.0,
-    "topP": 0.8,
-    "topK": 10,
-    "stopSequences": ["Title"]
-  }
-}
-```
-
-### Thinking Configuration (Gemini 2.5)
-```json
-{
-  "generationConfig": {
-    "temperature": 0.7,
-    "thinkingConfig": {
-      "thinkingBudget": 0,
-      "includeThoughts": true
-    }
-  }
-}
-```
-
-### REST API with Generation Config
-```bash
-curl https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent \
-  -H "x-goog-api-key: $GEMINI_API_KEY" \
-  -H 'Content-Type: application/json' \
-  -X POST \
-  -d '{
-    "contents": [
-      {
-        "parts": [
-          {
-            "text": "Explain how AI works"
-          }
-        ]
-      }
-    ],
-    "generationConfig": {
-      "stopSequences": ["Title"],
-      "temperature": 1.0,
-      "topP": 0.8,
-      "topK": 10,
-      "thinkingConfig": {
-        "includeThoughts": true
-      }
-    }
-  }'
-```
-
-### Python with Generation Config
-```python
-from google import genai
-from google.genai import types
-
-client = genai.Client()
-
-response = client.models.generate_content(
-    model="gemini-2.5-flash",
-    contents=["Explain how AI works"],
-    config=types.GenerateContentConfig(
-        temperature=0.1,
-        thinking_config=types.ThinkingConfig(
-            include_thoughts=True
-        )
-    )
-)
-```
-
-### JavaScript with Generation Config
-```javascript
-import { GoogleGenAI } from "@google/genai";
-
-const ai = new GoogleGenAI({});
-
-const response = await ai.models.generateContent({
-  model: "gemini-2.5-flash",
-  contents: "Explain how AI works",
-  config: {
-    temperature: 0.1,
-    thinkingConfig: {
-      includeThoughts: true,
-    },
-  },
-});
-```
-
-## Complete Example with All Features
-
-### REST API Complete Example
-```bash
-curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent" \
-  -H "x-goog-api-key: $GEMINI_API_KEY" \
-  -H 'Content-Type: application/json' \
-  -X POST \
-  -d '{
-    "systemInstruction": {
-      "parts": [
-        {
-          "text": "You are a helpful AI assistant specialized in explaining complex topics clearly."
-        }
-      ]
-    },
-    "contents": [
-      {
-        "role": "user",
-        "parts": [
-          {
-            "text": "Hello, I need help understanding machine learning."
-          }
-        ]
-      },
-      {
-        "role": "model",
-        "parts": [
-          {
-            "text": "Hello! I would be happy to help you understand machine learning. What specific aspect would you like to explore?"
-          }
-        ]
-      },
-      {
-        "role": "user",
-        "parts": [
-          {
-            "text": "Can you explain neural networks in simple terms?"
-          }
-        ]
-      }
-    ],
-    "generationConfig": {
-      "temperature": 0.7,
-      "topP": 0.8,
-      "topK": 40,
-      "maxOutputTokens": 1000,
-      "thinkingConfig": {
-        "includeThoughts": true
-      }
-    }
-  }'
-```
-
-## Key Takeaways
-
-1. **System Instructions**: Separate field (`systemInstruction`), not part of `contents`
-2. **Message Ordering**: No requirement for system messages to be first in `contents`
-3. **Conversation Flow**: `contents` array follows chronological order with alternating user/model roles
-4. **Generation Config**: Separate `generationConfig` object for model parameters
-5. **Thinking Mode**: Available in Gemini 2.5 models via `thinkingConfig`
-
-## References
-
-All information in this document is sourced from official Google Gemini API documentation:
-
- **Text Generation Guide**: https://ai.google.dev/gemini-api/docs/text-generation
- **API Reference**: https://ai.google.dev/api/generate-content
- **System Instructions**: Examples from text generation guide showing `systemInstruction` as separate field
- **Chat Examples**: Multi-turn conversation examples from official documentation
- **Generation Config**: Configuration examples from official REST API documentation
- **Thinking Configuration**: Gemini 2.5 thinking examples from official documentation
-
-Each code example and API structure shown above is directly from Google's official documentation and represents the current (as of January 2025) API specification.
@@ -1,162 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example script demonstrating how to monkey-patch litellm to automatically
-include thinkingConfig in Gemini API calls.
-
-This approach allows you to enable Gemini's thinking/reasoning capabilities
-without modifying the litellm source code.
-
-This version patches both sync and async transformation functions to ensure
-compatibility with both litellm.completion() and litellm.acompletion().
-OpenHands uses the sync version, so this is important for real-world usage.
-"""
-
-import asyncio
-
-import litellm
-from litellm.llms.vertex_ai.gemini.transformation import (
-    async_transform_request_body,
-    sync_transform_request_body,
-)
-
-
-def apply_gemini_thinking_patch():
-    """
-    Apply a monkey patch to litellm to automatically include thinkingConfig
-    in all Gemini API calls (both sync and async).
-    """
-    # Store the original transformation functions
-    original_async_transform = async_transform_request_body
-    original_sync_transform = sync_transform_request_body
-
-    # Create patched async version that adds thinkingConfig
-    async def patched_async_transform_with_thinking(*args, **kwargs):
-        # Add thinkingConfig to optional_params before calling the original function
-        if 'optional_params' in kwargs:
-            # Configure thinking settings - customize as needed
-            kwargs['optional_params']['thinkingConfig'] = {
-                'includeThoughts': True,
-                # Add other thinking config options here if needed
-            }
-        # Call the original function with modified params
-        return await original_async_transform(*args, **kwargs)
-
-    # Create patched sync version that adds thinkingConfig
-    def patched_sync_transform_with_thinking(*args, **kwargs):
-        # Add thinkingConfig to optional_params before calling the original function
-        if 'optional_params' in kwargs:
-            # Configure thinking settings - customize as needed
-            kwargs['optional_params']['thinkingConfig'] = {
-                'includeThoughts': True,
-                # Add other thinking config options here if needed
-            }
-        # Call the original function with modified params
-        return original_sync_transform(*args, **kwargs)
-
-    # Apply the monkey patches
-    import litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini as gemini_module
-
-    gemini_module.async_transform_request_body = patched_async_transform_with_thinking
-    gemini_module.sync_transform_request_body = patched_sync_transform_with_thinking
-
-    print('✅ Gemini thinking patch applied successfully (both sync and async)!')
-    print(
-        '   All Gemini API calls will now include thinkingConfig with includeThoughts=True'
-    )
-
-    return original_async_transform, original_sync_transform
-
-
-def remove_gemini_thinking_patch(original_functions):
-    """Remove the monkey-patch and restore original functions."""
-    original_async_transform, original_sync_transform = original_functions
-    import litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini as gemini_module
-
-    gemini_module.async_transform_request_body = original_async_transform
-    gemini_module.sync_transform_request_body = original_sync_transform
-    print('✅ Gemini thinking patch removed successfully!')
-
-
-async def example_async_usage():
-    """
-    Example of using litellm.acompletion() with the thinking patch applied.
-    """
-    try:
-        # Make an async completion request - thinkingConfig will be automatically included
-        response = await litellm.acompletion(
-            model='gemini/gemini-pro',
-            messages=[
-                {
-                    'role': 'user',
-                    'content': 'Explain the concept of quantum entanglement in simple terms.',
-                }
-            ],
-            temperature=0.7,
-            max_tokens=200,
-            api_key='your-gemini-api-key-here',  # Replace with your actual API key
-        )
-
-        print('\n🔮 Async Response:')
-        print(response.choices[0].message.content)
-
-    except Exception as e:
-        print(f'❌ Error in async call: {e}')
-
-
-def example_sync_usage():
-    """
-    Example of using litellm.completion() with the thinking patch applied.
-    This is the version that OpenHands uses.
-    """
-    try:
-        # Make a sync completion request - thinkingConfig will be automatically included
-        response = litellm.completion(
-            model='gemini/gemini-pro',
-            messages=[
-                {
-                    'role': 'user',
-                    'content': 'What are the key principles of machine learning?',
-                }
-            ],
-            temperature=0.7,
-            max_tokens=200,
-            api_key='your-gemini-api-key-here',  # Replace with your actual API key
-        )
-
-        print('\n🔮 Sync Response:')
-        print(response.choices[0].message.content)
-
-    except Exception as e:
-        print(f'❌ Error in sync call: {e}')
-
-
-async def main():
-    """
-    Main function demonstrating the complete workflow.
-    """
-    print('🚀 Gemini Thinking Patch Example')
-    print('=' * 40)
-
-    # Apply the patch
-    original_functions = apply_gemini_thinking_patch()
-
-    try:
-        print('\n📝 Testing sync completion (like OpenHands uses)...')
-        example_sync_usage()
-
-        print('\n📝 Testing async completion...')
-        await example_async_usage()
-
-    finally:
-        # Clean up - restore original functions
-        remove_gemini_thinking_patch(original_functions)
-
-    print('\n✨ Example completed!')
-
-
-if __name__ == '__main__':
-    # Note: You'll need to set your Gemini API key for this to work
-    # export GEMINI_API_KEY="your-api-key-here"
-    # or replace "your-gemini-api-key-here" in the examples above
-
-    asyncio.run(main())
@@ -106,10 +106,15 @@ class CodeActAgent(Agent):
    def _get_tools(self) -> list['ChatCompletionToolParam']:
        # For these models, we use short tool descriptions ( < 1024 tokens)
        # to avoid hitting the OpenAI token limit for tool descriptions.
-        SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
+        SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-4', 'o3', 'o1', 'o4']

        use_short_tool_desc = False
        if self.llm is not None:
+            # For historical reasons, previously OpenAI enforces max function description length of 1k characters
+            # https://community.openai.com/t/function-call-description-max-length/529902
+            # But it no longer seems to be an issue recently
+            # https://community.openai.com/t/was-the-character-limit-for-schema-descriptions-upgraded/1225975
+            # Tested on GPT-5 and longer description still works. But we still keep the logic to be safe for older models.
            use_short_tool_desc = any(
                model_substr in self.llm.config.model
                for model_substr in SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS
@@ -1,7 +1,6 @@
-import sys
-
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

+from openhands.agenthub.codeact_agent.tools.prompt import refine_prompt
 from openhands.llm.tool_names import EXECUTE_BASH_TOOL_NAME

 _DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
@@ -35,12 +34,6 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.
 * One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together."""


-def refine_prompt(prompt: str):
-    if sys.platform == 'win32':
-        return prompt.replace('bash', 'powershell')
-    return prompt
-
-
 def create_cmd_run_tool(
    use_short_description: bool = False,
 ) -> ChatCompletionToolParam:
@@ -0,0 +1,29 @@
+import re
+import sys
+
+
+def refine_prompt(prompt: str):
+    """
+    Refines the prompt based on the platform.
+
+    On Windows systems, replaces 'bash' with 'powershell' and 'execute_bash' with 'execute_powershell'
+    to ensure commands work correctly on the Windows platform.
+
+    Args:
+        prompt: The prompt text to refine
+
+    Returns:
+        The refined prompt text
+    """
+    if sys.platform == 'win32':
+        # Replace 'bash' with 'powershell' including tool names like 'execute_bash'
+        # First replace 'execute_bash' with 'execute_powershell' to handle tool names
+        result = re.sub(
+            r'\bexecute_bash\b', 'execute_powershell', prompt, flags=re.IGNORECASE
+        )
+        # Then replace standalone 'bash' with 'powershell'
+        result = re.sub(
+            r'(?<!execute_)(?<!_)\bbash\b', 'powershell', result, flags=re.IGNORECASE
+        )
+        return result
+    return prompt
@@ -0,0 +1 @@
+"""OpenHands CLI module."""
@@ -0,0 +1,54 @@
+"""Main entry point for OpenHands CLI with subcommand support."""
+
+import sys
+
+import openhands
+import openhands.cli.suppress_warnings  # noqa: F401
+from openhands.cli.gui_launcher import launch_gui_server
+from openhands.cli.main import run_cli_command
+from openhands.core.config import get_cli_parser
+from openhands.core.config.arg_utils import get_subparser
+
+
+def main():
+    """Main entry point with subcommand support and backward compatibility."""
+    parser = get_cli_parser()
+
+    # If user only asks for --help or -h without a subcommand
+    if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'):
+        # Print top-level help
+        print(parser.format_help())
+
+        # Also print help for `cli` subcommand
+        print('\n' + '=' * 80)
+        print('CLI command help:\n')
+
+        cli_parser = get_subparser(parser, 'cli')
+        print(cli_parser.format_help())
+
+        sys.exit(0)
+
+    # Special case: no subcommand provided, simulate "openhands cli"
+    if len(sys.argv) == 1 or (
+        len(sys.argv) > 1 and sys.argv[1] not in ['cli', 'serve']
+    ):
+        # Inject 'cli' as default command
+        sys.argv.insert(1, 'cli')
+
+    args = parser.parse_args()
+
+    if hasattr(args, 'version') and args.version:
+        print(f'OpenHands CLI version: {openhands.get_version()}')
+        sys.exit(0)
+
+    if args.command == 'serve':
+        launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
+    elif args.command == 'cli' or args.command is None:
+        run_cli_command(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,219 @@
+"""GUI launcher for OpenHands CLI."""
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+
+from openhands import __version__
+
+
+def _format_docker_command_for_logging(cmd: list[str]) -> str:
+    """Format a Docker command for logging with grey color.
+
+    Args:
+        cmd (list[str]): The Docker command as a list of strings
+
+    Returns:
+        str: The formatted command string in grey HTML color
+    """
+    cmd_str = ' '.join(cmd)
+    return f'<grey>Running Docker command: {cmd_str}</grey>'
+
+
+def check_docker_requirements() -> bool:
+    """Check if Docker is installed and running.
+
+    Returns:
+        bool: True if Docker is available and running, False otherwise.
+    """
+    # Check if Docker is installed
+    if not shutil.which('docker'):
+        print_formatted_text(
+            HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
+        )
+        print_formatted_text(
+            HTML(
+                '<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
+            )
+        )
+        return False
+
+    # Check if Docker daemon is running
+    try:
+        result = subprocess.run(
+            ['docker', 'info'], capture_output=True, text=True, timeout=10
+        )
+        if result.returncode != 0:
+            print_formatted_text(
+                HTML('<ansired>❌ Docker daemon is not running.</ansired>')
+            )
+            print_formatted_text(
+                HTML('<grey>Please start Docker and try again.</grey>')
+            )
+            return False
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to check Docker status.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        return False
+
+    return True
+
+
+def ensure_config_dir_exists() -> Path:
+    """Ensure the OpenHands configuration directory exists and return its path."""
+    config_dir = Path.home() / '.openhands'
+    config_dir.mkdir(exist_ok=True)
+    return config_dir
+
+
+def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
+    """Launch the OpenHands GUI server using Docker.
+
+    Args:
+        mount_cwd: If True, mount the current working directory into the container.
+        gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
+    """
+    print_formatted_text(
+        HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
+    )
+    print_formatted_text('')
+
+    # Check Docker requirements
+    if not check_docker_requirements():
+        sys.exit(1)
+
+    # Ensure config directory exists
+    config_dir = ensure_config_dir_exists()
+
+    # Get the current version for the Docker image
+    version = __version__
+    runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
+    app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
+
+    print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
+
+    # Pull the runtime image first
+    pull_cmd = ['docker', 'pull', runtime_image]
+    print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
+    try:
+        subprocess.run(
+            pull_cmd,
+            check=True,
+            timeout=300,  # 5 minutes timeout
+        )
+    except subprocess.CalledProcessError:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
+        )
+        sys.exit(1)
+    except subprocess.TimeoutExpired:
+        print_formatted_text(
+            HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
+        )
+        sys.exit(1)
+
+    print_formatted_text('')
+    print_formatted_text(
+        HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
+    )
+    print_formatted_text(
+        HTML('<grey>The server will be available at: http://localhost:3000</grey>')
+    )
+    print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
+    print_formatted_text('')
+
+    # Build the Docker command
+    docker_cmd = [
+        'docker',
+        'run',
+        '-it',
+        '--rm',
+        '--pull=always',
+        '-e',
+        f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
+        '-e',
+        'LOG_ALL_EVENTS=true',
+        '-v',
+        '/var/run/docker.sock:/var/run/docker.sock',
+        '-v',
+        f'{config_dir}:/.openhands',
+    ]
+
+    # Add GPU support if requested
+    if gpu:
+        print_formatted_text(
+            HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
+        )
+        # Add the --gpus all flag to enable all GPUs
+        docker_cmd.insert(2, '--gpus')
+        docker_cmd.insert(3, 'all')
+        # Add environment variable to pass GPU support to sandbox containers
+        docker_cmd.extend(
+            [
+                '-e',
+                'SANDBOX_ENABLE_GPU=true',
+            ]
+        )
+
+    # Add current working directory mount if requested
+    if mount_cwd:
+        cwd = Path.cwd()
+        # Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
+        docker_cmd.extend(
+            [
+                '-e',
+                f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
+            ]
+        )
+
+        # Set user ID for Unix-like systems only
+        if os.name != 'nt':  # Not Windows
+            try:
+                user_id = subprocess.check_output(['id', '-u'], text=True).strip()
+                docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
+            except (subprocess.CalledProcessError, FileNotFoundError):
+                # If 'id' command fails or doesn't exist, skip setting user ID
+                pass
+        # Print the folder that will be mounted to inform the user
+        print_formatted_text(
+            HTML(
+                f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
+            )
+        )
+
+    docker_cmd.extend(
+        [
+            '-p',
+            '3000:3000',
+            '--add-host',
+            'host.docker.internal:host-gateway',
+            '--name',
+            'openhands-app',
+            app_image,
+        ]
+    )
+
+    try:
+        # Log and run the Docker command
+        print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
+        subprocess.run(docker_cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
+        )
+        sys.exit(0)
@@ -45,7 +45,6 @@ from openhands.controller import AgentController
 from openhands.controller.agent import Agent
 from openhands.core.config import (
    OpenHandsConfig,
-    parse_arguments,
    setup_config_from_args,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
@@ -524,10 +523,8 @@ def run_alias_setup_flow(config: OpenHandsConfig) -> None:
    print_formatted_text('')


-async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
+async def main_with_loop(loop: asyncio.AbstractEventLoop, args) -> None:
    """Runs the agent in CLI mode."""
-    args = parse_arguments()
-
    # Set log level from command line argument if provided
    if args.log_level and isinstance(args.log_level, str):
        log_level = getattr(logging, str(args.log_level).upper())
@@ -575,13 +572,9 @@ async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:

    # Use settings from settings store if available and override with command line arguments
    if settings:
-        # Handle agent configuration
-        if args.agent_cls:
-            config.default_agent = str(args.agent_cls)
-        else:
-            # settings.agent is not None because we check for it in setup_config_from_args
-            assert settings.agent is not None
-            config.default_agent = settings.agent
+        # settings.agent is not None because we check for it in setup_config_from_args
+        assert settings.agent is not None
+        config.default_agent = settings.agent

        # Handle LLM configuration with proper precedence:
        # 1. CLI parameters (-l) have highest precedence (already handled in setup_config_from_args)
@@ -719,18 +712,19 @@ After reviewing the file, please ask the user what they would like to do with it
    get_runtime_cls(config.runtime).teardown(config)


-def main():
+def run_cli_command(args):
+    """Run the CLI command with proper error handling and cleanup."""
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
-        loop.run_until_complete(main_with_loop(loop))
+        loop.run_until_complete(main_with_loop(loop, args))
    except KeyboardInterrupt:
        print_formatted_text('⚠️ Session was interrupted: interrupted\n')
    except ConnectionRefusedError as e:
-        print(f'Connection refused: {e}')
+        print_formatted_text(f'Connection refused: {e}')
        sys.exit(1)
    except Exception as e:
-        print(f'An error occurred: {e}')
+        print_formatted_text(f'An error occurred: {e}')
        sys.exit(1)
    finally:
        try:
@@ -743,9 +737,5 @@ def main():
            loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
            loop.close()
        except Exception as e:
-            print(f'Error during cleanup: {e}')
+            print_formatted_text(f'Error during cleanup: {e}')
            sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
@@ -11,6 +11,7 @@ import threading
 import time
 from typing import Generator

+import markdown  # type: ignore
 from prompt_toolkit import PromptSession, print_formatted_text
 from prompt_toolkit.application import Application
 from prompt_toolkit.completion import CompleteEvent, Completer, Completion
@@ -65,6 +66,7 @@ MAX_RECENT_THOUGHTS = 5
 # Color and styling constants
 COLOR_GOLD = '#FFD700'
 COLOR_GREY = '#808080'
+COLOR_AGENT_BLUE = '#4682B4'  # Steel blue - less saturated, works well on both light and dark backgrounds
 DEFAULT_STYLE = Style.from_dict(
    {
        'gold': COLOR_GOLD,
@@ -236,13 +238,19 @@ def display_mcp_errors() -> None:


 # Prompt output display functions
-def display_thought_if_new(thought: str) -> None:
-    """Display a thought only if it hasn't been displayed recently."""
+def display_thought_if_new(thought: str, is_agent_message: bool = False) -> None:
+    """
+    Display a thought only if it hasn't been displayed recently.
+
+    Args:
+        thought: The thought to display
+        is_agent_message: If True, apply agent styling and markdown rendering
+    """
    global recent_thoughts
    if thought and thought.strip():
        # Check if this thought was recently displayed
        if thought not in recent_thoughts:
-            display_message(thought)
+            display_message(thought, is_agent_message=is_agent_message)
            recent_thoughts.append(thought)
            # Keep only the most recent thoughts
            if len(recent_thoughts) > MAX_RECENT_THOUGHTS:
@@ -255,7 +263,7 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
        if isinstance(event, CmdRunAction):
            # For CmdRunAction, display thought first, then command
            if hasattr(event, 'thought') and event.thought:
-                display_message(event.thought)
+                display_thought_if_new(event.thought)

            # Only display the command if it's not already confirmed
            # Commands are always shown when AWAITING_CONFIRMATION, so we don't need to show them again when CONFIRMED
@@ -269,14 +277,15 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
        elif isinstance(event, Action):
            # For other actions, display thoughts normally
            if hasattr(event, 'thought') and event.thought:
-                display_message(event.thought)
+                display_thought_if_new(event.thought)
            if hasattr(event, 'final_thought') and event.final_thought:
-                display_message(event.final_thought)
+                # Display final thoughts with agent styling
+                display_message(event.final_thought, is_agent_message=True)

        if isinstance(event, MessageAction):
            if event.source == EventSource.AGENT:
-                # Check if this message content is a duplicate thought
-                display_thought_if_new(event.content)
+                # Display agent messages with styling and markdown rendering
+                display_thought_if_new(event.content, is_agent_message=True)
        elif isinstance(event, CmdOutputObservation):
            display_command_output(event.content)
        elif isinstance(event, FileEditObservation):
@@ -291,11 +300,76 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
            display_error(event.content)


-def display_message(message: str) -> None:
+def display_message(message: str, is_agent_message: bool = False) -> None:
+    """
+    Display a message in the terminal with markdown rendering.
+
+    Args:
+        message: The message to display
+        is_agent_message: If True, apply agent styling (blue color)
+    """
    message = message.strip()

    if message:
-        print_formatted_text(f'\n{message}')
+        # Add spacing before the message
+        print_formatted_text('')
+
+        try:
+            # Convert markdown to HTML for all messages
+            html_content = convert_markdown_to_html(message)
+
+            if is_agent_message:
+                # Use prompt_toolkit's HTML renderer with the agent color
+                print_formatted_text(
+                    HTML(f'<style fg="{COLOR_AGENT_BLUE}">{html_content}</style>')
+                )
+            else:
+                # Regular message display with HTML rendering but default color
+                print_formatted_text(HTML(html_content))
+        except Exception as e:
+            # If HTML rendering fails, fall back to plain text
+            print(f'Warning: HTML rendering failed: {str(e)}', file=sys.stderr)
+            if is_agent_message:
+                print_formatted_text(
+                    FormattedText([('fg:' + COLOR_AGENT_BLUE, message)])
+                )
+            else:
+                print_formatted_text(message)
+
+
+def convert_markdown_to_html(text: str) -> str:
+    """
+    Convert markdown to HTML for prompt_toolkit's HTML renderer using the markdown library.
+
+    Args:
+        text: Markdown text to convert
+
+    Returns:
+        HTML formatted text with custom styling for headers and bullet points
+    """
+    if not text:
+        return text
+
+    # Use the markdown library to convert markdown to HTML
+    # Enable the 'extra' extension for tables, fenced code, etc.
+    html = markdown.markdown(text, extensions=['extra'])
+
+    # Customize headers
+    for i in range(1, 7):
+        # Get the appropriate number of # characters for this heading level
+        prefix = '#' * i + ' '
+
+        # Replace <h1> with the prefix and bold text
+        html = html.replace(f'<h{i}>', f'<b>{prefix}')
+        html = html.replace(f'</h{i}>', '</b>\n')
+
+    # Customize bullet points to use dashes instead of dots with compact spacing
+    html = html.replace('<ul>', '')
+    html = html.replace('</ul>', '')
+    html = html.replace('<li>', '- ')
+    html = html.replace('</li>', '')
+
+    return html


 def display_error(error: str) -> None:
@@ -150,6 +150,7 @@ def organize_models_and_providers(
 VERIFIED_PROVIDERS = ['openhands', 'anthropic', 'openai', 'mistral']

 VERIFIED_OPENAI_MODELS = [
+    'gpt-5-2025-08-07',
    'o4-mini',
    'gpt-4o',
    'gpt-4o-mini',
@@ -184,6 +185,7 @@ VERIFIED_MISTRAL_MODELS = [

 VERIFIED_OPENHANDS_MODELS = [
    'claude-sonnet-4-20250514',
+    'gpt-5-2025-08-07',
    'claude-opus-4-20250514',
    'claude-opus-4-1-20250805',
    'devstral-small-2507',
@@ -1,4 +1,9 @@
 from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.arg_utils import (
+    get_cli_parser,
+    get_evaluation_parser,
+    get_headless_parser,
+)
 from openhands.core.config.cli_config import CLIConfig
 from openhands.core.config.config_utils import (
    OH_DEFAULT_AGENT,
@@ -15,7 +20,6 @@ from openhands.core.config.utils import (
    finalize_config,
    get_agent_config_arg,
    get_llm_config_arg,
-    get_parser,
    load_from_env,
    load_from_toml,
    load_openhands_config,
@@ -41,7 +45,9 @@ __all__ = [
    'get_agent_config_arg',
    'get_llm_config_arg',
    'get_field_info',
-    'get_parser',
+    'get_cli_parser',
+    'get_headless_parser',
+    'get_evaluation_parser',
    'parse_arguments',
    'setup_config_from_args',
 ]
@@ -0,0 +1,224 @@
+"""Centralized command line argument configuration for OpenHands CLI and headless modes."""
+
+import argparse
+from argparse import ArgumentParser, _SubParsersAction
+
+
+def get_subparser(parser: ArgumentParser, name: str) -> ArgumentParser:
+    for action in parser._actions:
+        if isinstance(action, _SubParsersAction):
+            if name in action.choices:
+                return action.choices[name]
+    raise ValueError(f"Subparser '{name}' not found")
+
+
+def add_common_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add common arguments shared between CLI and headless modes."""
+    parser.add_argument(
+        '--config-file',
+        type=str,
+        default='config.toml',
+        help='Path to the config file (default: config.toml in the current directory)',
+    )
+    parser.add_argument(
+        '-t',
+        '--task',
+        type=str,
+        default='',
+        help='The task for the agent to perform',
+    )
+    parser.add_argument(
+        '-f',
+        '--file',
+        type=str,
+        help='Path to a file containing the task. Overrides -t if both are provided.',
+    )
+    parser.add_argument(
+        '-n',
+        '--name',
+        help='Session name',
+        type=str,
+        default='',
+    )
+    parser.add_argument(
+        '--log-level',
+        help='Set the log level',
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        '-l',
+        '--llm-config',
+        default=None,
+        type=str,
+        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
+    )
+    parser.add_argument(
+        '--agent-config',
+        default=None,
+        type=str,
+        help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
+    )
+    parser.add_argument(
+        '-v', '--version', action='store_true', help='Show version information'
+    )
+
+
+def add_evaluation_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add arguments specific to evaluation mode."""
+    # Evaluation-specific arguments
+    parser.add_argument(
+        '--eval-output-dir',
+        default='evaluation/evaluation_outputs/outputs',
+        type=str,
+        help='The directory to save evaluation output',
+    )
+    parser.add_argument(
+        '--eval-n-limit',
+        default=None,
+        type=int,
+        help='The number of instances to evaluate',
+    )
+    parser.add_argument(
+        '--eval-num-workers',
+        default=4,
+        type=int,
+        help='The number of workers to use for evaluation',
+    )
+    parser.add_argument(
+        '--eval-note',
+        default=None,
+        type=str,
+        help='The note to add to the evaluation directory',
+    )
+    parser.add_argument(
+        '--eval-ids',
+        default=None,
+        type=str,
+        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
+    )
+
+
+def add_headless_specific_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add arguments specific to headless mode (full evaluation suite)."""
+    parser.add_argument(
+        '-d',
+        '--directory',
+        type=str,
+        help='The working directory for the agent',
+    )
+    parser.add_argument(
+        '-c',
+        '--agent-cls',
+        default=None,
+        type=str,
+        help='Name of the default agent to use',
+    )
+    parser.add_argument(
+        '-i',
+        '--max-iterations',
+        default=None,
+        type=int,
+        help='The maximum number of iterations to run the agent',
+    )
+    parser.add_argument(
+        '-b',
+        '--max-budget-per-task',
+        type=float,
+        help='The maximum budget allowed per task, beyond which the agent will stop.',
+    )
+    # Additional headless-specific arguments
+    parser.add_argument(
+        '--no-auto-continue',
+        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
+        action='store_true',
+        default=False,
+    )
+    parser.add_argument(
+        '--selected-repo',
+        help='GitHub repository to clone (format: owner/repo)',
+        type=str,
+        default=None,
+    )
+
+
+def get_cli_parser() -> argparse.ArgumentParser:
+    """Create argument parser for CLI mode with simplified argument set."""
+    # Create a description with welcome message explaining available commands
+    description = (
+        'Welcome to OpenHands: Code Less, Make More\n\n'
+        'OpenHands supports two main commands:\n'
+        '  serve - Launch the OpenHands GUI server (web interface)\n'
+        '  cli   - Run OpenHands in CLI mode (terminal interface)\n\n'
+        'Running "openhands" without a command is the same as "openhands cli"'
+    )
+
+    parser = argparse.ArgumentParser(
+        description=description,
+        prog='openhands',
+        formatter_class=argparse.RawDescriptionHelpFormatter,  # Preserve formatting in description
+        epilog='For more information about a command, run: openhands COMMAND --help',
+    )
+
+    # Create subparsers
+    subparsers = parser.add_subparsers(
+        dest='command',
+        title='commands',
+        description='OpenHands supports two main commands:',
+        metavar='COMMAND',
+    )
+
+    # Add 'serve' subcommand
+    serve_parser = subparsers.add_parser(
+        'serve', help='Launch the OpenHands GUI server using Docker (web interface)'
+    )
+    serve_parser.add_argument(
+        '--mount-cwd',
+        help='Mount the current working directory into the GUI server container',
+        action='store_true',
+        default=False,
+    )
+    serve_parser.add_argument(
+        '--gpu',
+        help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
+        action='store_true',
+        default=False,
+    )
+
+    # Add 'cli' subcommand - import all the existing CLI arguments
+    cli_parser = subparsers.add_parser(
+        'cli', help='Run OpenHands in CLI mode (terminal interface)'
+    )
+    add_common_arguments(cli_parser)
+
+    cli_parser.add_argument(
+        '--override-cli-mode',
+        help='Override the default settings for CLI mode',
+        type=bool,
+        default=False,
+    )
+    parser.add_argument(
+        '--conversation',
+        help='The conversation id to continue',
+        type=str,
+        default=None,
+    )
+
+    return parser
+
+
+def get_headless_parser() -> argparse.ArgumentParser:
+    """Create argument parser for headless mode with full argument set."""
+    parser = argparse.ArgumentParser(description='Run the agent via CLI')
+    add_common_arguments(parser)
+    add_headless_specific_arguments(parser)
+    return parser
+
+
+def get_evaluation_parser() -> argparse.ArgumentParser:
+    """Create argument parser for evaluation mode."""
+    parser = argparse.ArgumentParser(description='Run OpenHands in evaluation mode')
+    add_common_arguments(parser)
+    add_headless_specific_arguments(parser)
+    add_evaluation_arguments(parser)
+    return parser
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import re
 import shlex
@@ -302,6 +304,13 @@ class MCPConfig(BaseModel):
            raise ValueError(f'Invalid MCP configuration: {e}')
        return mcp_mapping

+    def merge(self, other: MCPConfig):
+        return MCPConfig(
+            sse_servers=self.sse_servers + other.sse_servers,
+            stdio_servers=self.stdio_servers + other.stdio_servers,
+            shttp_servers=self.shttp_servers + other.shttp_servers,
+        )
+

 class OpenHandsMCPConfig:
    @staticmethod
@@ -72,6 +72,7 @@ class OpenHandsConfig(BaseModel):
    file_store_path: str = Field(default='~/.openhands')
    file_store_web_hook_url: str | None = Field(default=None)
    file_store_web_hook_headers: dict | None = Field(default=None)
+    file_store_web_hook_batch: bool = Field(default=False)
    enable_browser: bool = Field(default=True)
    save_trajectory_path: str | None = Field(default=None)
    save_screenshots_in_trajectory: bool = Field(default=False)
@@ -15,6 +15,7 @@ from pydantic import BaseModel, SecretStr, ValidationError
 from openhands import __version__
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.arg_utils import get_headless_parser
 from openhands.core.config.condenser_config import (
    CondenserConfig,
    condenser_config_from_toml_section,
@@ -670,148 +671,9 @@ def get_condenser_config_arg(
        return None


-# Command line arguments
-def get_parser() -> argparse.ArgumentParser:
-    """Get the argument parser."""
-    parser = argparse.ArgumentParser(description='Run the agent via CLI')
-
-    # Add version argument
-    parser.add_argument(
-        '-v', '--version', action='store_true', help='Show version information'
-    )
-
-    parser.add_argument(
-        '--config-file',
-        type=str,
-        default='config.toml',
-        help='Path to the config file (default: config.toml in the current directory)',
-    )
-    parser.add_argument(
-        '-d',
-        '--directory',
-        type=str,
-        help='The working directory for the agent',
-    )
-    parser.add_argument(
-        '-t',
-        '--task',
-        type=str,
-        default='',
-        help='The task for the agent to perform',
-    )
-    parser.add_argument(
-        '-f',
-        '--file',
-        type=str,
-        help='Path to a file containing the task. Overrides -t if both are provided.',
-    )
-    parser.add_argument(
-        '-c',
-        '--agent-cls',
-        default=None,
-        type=str,
-        help='Name of the default agent to use',
-    )
-    parser.add_argument(
-        '-i',
-        '--max-iterations',
-        default=None,
-        type=int,
-        help='The maximum number of iterations to run the agent',
-    )
-    parser.add_argument(
-        '-b',
-        '--max-budget-per-task',
-        type=float,
-        help='The maximum budget allowed per task, beyond which the agent will stop.',
-    )
-    # --eval configs are for evaluations only
-    parser.add_argument(
-        '--eval-output-dir',
-        default='evaluation/evaluation_outputs/outputs',
-        type=str,
-        help='The directory to save evaluation output',
-    )
-    parser.add_argument(
-        '--eval-n-limit',
-        default=None,
-        type=int,
-        help='The number of instances to evaluate',
-    )
-    parser.add_argument(
-        '--eval-num-workers',
-        default=4,
-        type=int,
-        help='The number of workers to use for evaluation',
-    )
-    parser.add_argument(
-        '--eval-note',
-        default=None,
-        type=str,
-        help='The note to add to the evaluation directory',
-    )
-    parser.add_argument(
-        '-l',
-        '--llm-config',
-        default=None,
-        type=str,
-        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
-    )
-    parser.add_argument(
-        '--agent-config',
-        default=None,
-        type=str,
-        help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
-    )
-    parser.add_argument(
-        '-n',
-        '--name',
-        help='Session name',
-        type=str,
-        default='',
-    )
-    parser.add_argument(
-        '--conversation',
-        help='The conversation id to continue',
-        type=str,
-        default=None,
-    )
-    parser.add_argument(
-        '--eval-ids',
-        default=None,
-        type=str,
-        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
-    )
-    parser.add_argument(
-        '--no-auto-continue',
-        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
-        action='store_true',
-        default=False,
-    )
-    parser.add_argument(
-        '--selected-repo',
-        help='GitHub repository to clone (format: owner/repo)',
-        type=str,
-        default=None,
-    )
-    parser.add_argument(
-        '--override-cli-mode',
-        help='Override the default settings for CLI mode',
-        type=bool,
-        default=False,
-    )
-    parser.add_argument(
-        '--log-level',
-        help='Set the log level',
-        type=str,
-        default=None,
-    )
-    return parser
-
-
 def parse_arguments() -> argparse.Namespace:
    """Parse command line arguments."""
-    parser = get_parser()
+    parser = get_headless_parser()
    args = parser.parse_args()

    if args.version:
@@ -916,17 +778,17 @@ def setup_config_from_args(args: argparse.Namespace) -> OpenHandsConfig:
        )

    # Override default agent if provided
-    if args.agent_cls:
+    if hasattr(args, 'agent_cls') and args.agent_cls:
        config.default_agent = args.agent_cls

    # Set max iterations and max budget per task if provided, otherwise fall back to config values
-    if args.max_iterations is not None:
+    if hasattr(args, 'max_iterations') and args.max_iterations is not None:
        config.max_iterations = args.max_iterations
-    if args.max_budget_per_task is not None:
+    if hasattr(args, 'max_budget_per_task') and args.max_budget_per_task is not None:
        config.max_budget_per_task = args.max_budget_per_task

    # Read selected repository in config for use by CLI and main.py
-    if args.selected_repo is not None:
+    if hasattr(args, 'selected_repo') and args.selected_repo is not None:
        config.sandbox.selected_repo = args.selected_repo

    return config
@@ -383,7 +383,7 @@ Do NOT assume the environment is the same as in the example above.
 """
    example = example.lstrip()

-    return example
+    return refine_prompt(example)


 IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools
@@ -94,6 +94,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
    'kimi-k2-instruct',
    'Qwen3-Coder-480B-A35B-Instruct',
    'qwen3-coder',  # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
+    'gpt-5-2025-08-07',
 ]

 REASONING_EFFORT_SUPPORTED_MODELS = [
@@ -107,6 +108,7 @@ REASONING_EFFORT_SUPPORTED_MODELS = [
    'o4-mini-2025-04-16',
    'gemini-2.5-flash',
    'gemini-2.5-pro',
+    'gpt-5-2025-08-07',
 ]

 MODELS_WITHOUT_STOP_WORDS = [
@@ -201,34 +203,26 @@ class LLM(RetryMixin, DebugMixin):
        ):
            # For Gemini models, only map 'low' to optimized thinking budget
            # Let other reasoning_effort values pass through to API as-is
-            # RESTORED: Direct kwargs approach - testing direct kwargs only
            if 'gemini-2.5-pro' in self.config.model:
                logger.debug(
-                    f'Applying custom generation config for {self.config.model}'
+                    f'Gemini model {self.config.model} with reasoning_effort {self.config.reasoning_effort}'
                )
-                kwargs['generationConfig'] = {
-                    'temperature': 0,  # Put temperature in generationConfig instead of top-level
-                    'topP': 1,
-                    'thinkingConfig': {'includeThoughts': True},
-                }
-                # These are now inside generationConfig, so remove them from top-level
-                kwargs.pop(
-                    'temperature', None
-                )  # Remove top-level temperature since it's now in generationConfig
-                kwargs.pop(
-                    'top_p', None
-                )  # Remove top_p since it's in generationConfig as topP
-                # This is now inside thinkingConfig, so remove it from top-level
-                kwargs.pop('reasoning_effort', None)
-                # remove other related params that are no longer needed
-                kwargs.pop('thinking', None)
-                kwargs.pop('allowed_openai_params', None)
+                if self.config.reasoning_effort in {None, 'low', 'none'}:
+                    kwargs['thinking'] = {'budget_tokens': 128}
+                    kwargs['allowed_openai_params'] = ['thinking']
+                    kwargs.pop('reasoning_effort', None)
+                else:
+                    kwargs['reasoning_effort'] = self.config.reasoning_effort
+                logger.debug(
+                    f'Gemini model {self.config.model} with reasoning_effort {self.config.reasoning_effort} mapped to thinking {kwargs.get("thinking")}'
+                )
+
            else:
                kwargs['reasoning_effort'] = self.config.reasoning_effort
-                kwargs.pop(
-                    'temperature'
-                )  # temperature is not supported for reasoning models
-                kwargs.pop('top_p')  # reasoning model like o3 doesn't support top_p
+            kwargs.pop(
+                'temperature'
+            )  # temperature is not supported for reasoning models
+            kwargs.pop('top_p')  # reasoning model like o3 doesn't support top_p
        # Azure issue: https://github.com/All-Hands-AI/OpenHands/issues/6777
        if self.config.model.startswith('azure'):
            kwargs['max_tokens'] = self.config.max_output_tokens
@@ -333,7 +327,6 @@ class LLM(RetryMixin, DebugMixin):

            # log the entire LLM prompt
            self.log_prompt(messages)
-            print(self.config.model)

            # set litellm modify_params to the configured value
            # True by default to allow litellm to do transformations like adding a default message, when a message is empty
@@ -360,8 +353,6 @@ class LLM(RetryMixin, DebugMixin):
                    message=r'.*content=.*upload.*',
                    category=DeprecationWarning,
                )
-                # COMMENTED OUT: Context manager approach - testing direct kwargs only
-                # with self._gemini_thinking_patch_context():
                resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)

            # Calculate and record latency
@@ -444,101 +435,6 @@ class LLM(RetryMixin, DebugMixin):

        self._completion = wrapper

-    def _should_apply_gemini_thinking_patch(self) -> bool:
-        """Check if we should apply the Gemini thinking patch.
-
-        Returns True for Gemini 2.5 Pro models to enable thinking capabilities.
-        """
-        return 'gemini-2.5-pro' in self.config.model.lower()
-
-    def _gemini_thinking_patch_context(self):
-        """Context manager that temporarily applies Gemini thinking patch.
-
-        This ensures the patch is only active during the specific completion call
-        and is automatically cleaned up afterwards, preventing interference with
-        other models or subsequent calls.
-        """
-        from contextlib import contextmanager
-
-        @contextmanager
-        def patch_context():
-            if not self._should_apply_gemini_thinking_patch():
-                # No patch needed, just yield
-                yield
-                return
-
-            # Store original functions for restoration
-            original_sync_transform = None
-            original_async_transform = None
-            gemini_module = None
-
-            patch_applied = False
-            try:
-                # Import the modules we need to patch
-                import litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini as gemini_mod
-
-                gemini_module = gemini_mod
-
-                # Store original functions
-                original_sync_transform = gemini_module.sync_transform_request_body
-                original_async_transform = getattr(
-                    gemini_module, 'async_transform_request_body', None
-                )
-
-                # Create patched sync version
-                def patched_sync_transform_with_thinking(*args, **kwargs):
-                    if 'optional_params' in kwargs:
-                        kwargs['optional_params']['thinkingConfig'] = {
-                            'includeThoughts': True,
-                        }
-                    return original_sync_transform(*args, **kwargs)
-
-                # Create patched async version if it exists
-                async def patched_async_transform_with_thinking(*args, **kwargs):
-                    if 'optional_params' in kwargs:
-                        kwargs['optional_params']['thinkingConfig'] = {
-                            'includeThoughts': True,
-                        }
-                    if original_async_transform is not None:
-                        return await original_async_transform(*args, **kwargs)
-                    return None
-
-                # Apply patches
-                gemini_module.sync_transform_request_body = (
-                    patched_sync_transform_with_thinking
-                )
-                if original_async_transform:
-                    gemini_module.async_transform_request_body = (
-                        patched_async_transform_with_thinking
-                    )
-
-                patch_applied = True
-                logger.debug(
-                    f'Applied temporary Gemini thinking patch for model: {self.config.model}'
-                )
-
-            except ImportError as e:
-                logger.warning(f'Could not apply Gemini thinking patch: {e}')
-            except Exception as e:
-                logger.warning(f'Failed to apply Gemini thinking patch: {e}')
-
-            try:
-                # Yield control to the caller
-                yield
-            finally:
-                # Always restore original functions if patch was applied
-                if patch_applied and gemini_module and original_sync_transform:
-                    gemini_module.sync_transform_request_body = original_sync_transform
-                    logger.debug('Restored original sync_transform_request_body')
-
-                if patch_applied and gemini_module and original_async_transform:
-                    gemini_module.async_transform_request_body = (
-                        original_async_transform
-                    )
-                    logger.debug('Restored original async_transform_request_body')
-
-        return patch_context()
-
    @property
    def completion(self) -> Callable:
        """Decorator for the litellm completion function.
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING

 if TYPE_CHECKING:
    from openhands.controller.agent import Agent
+    from openhands.memory.memory import Memory


 from mcp import McpError
@@ -20,7 +21,6 @@ from openhands.events.observation.mcp import MCPObservation
 from openhands.events.observation.observation import Observation
 from openhands.mcp.client import MCPClient
 from openhands.mcp.error_collector import mcp_error_collector
-from openhands.memory.memory import Memory
 from openhands.runtime.base import Runtime
 from openhands.runtime.impl.cli.cli_runtime import CLIRuntime

@@ -9,6 +9,7 @@ import docker
 import httpx
 import tenacity
 from docker.models.containers import Container
+from docker.types import DriverConfig, Mount

 from openhands.core.config import OpenHandsConfig
 from openhands.core.exceptions import (
@@ -258,6 +259,9 @@ class DockerRuntime(ActionExecutionClient):
                    container_path = parts[1]
                    # Default mode is 'rw' if not specified
                    mount_mode = parts[2] if len(parts) > 2 else 'rw'
+                    # Skip overlay mounts here; they will be handled separately via Mount objects
+                    if 'overlay' in mount_mode:
+                        continue

                    volumes[host_path] = {
                        'bind': container_path,
@@ -286,6 +290,72 @@ class DockerRuntime(ActionExecutionClient):

        return volumes

+    def _process_overlay_mounts(self) -> list[Mount]:
+        """Process overlay mounts specified in sandbox.volumes with mode containing 'overlay'.
+
+        Returns:
+            List of docker.types.Mount objects configured with overlay driver providing
+            read-only lowerdir with per-container copy-on-write upper/work layers.
+        """
+        overlay_mounts: list[Mount] = []
+
+        # No volumes configured
+        if self.config.sandbox.volumes is None:
+            return overlay_mounts
+
+        # Base directory for overlay upper/work layers from env var
+        overlay_base = os.environ.get('SANDBOX_VOLUME_OVERLAYS')
+        if not overlay_base:
+            # If no base path provided, skip overlay processing
+            return overlay_mounts
+
+        os.makedirs(overlay_base, exist_ok=True)
+
+        mount_specs = self.config.sandbox.volumes.split(',')
+
+        for idx, mount_spec in enumerate(mount_specs):
+            parts = mount_spec.split(':')
+            if len(parts) < 2:
+                continue
+            host_path = os.path.abspath(parts[0])
+            container_path = parts[1]
+            mount_mode = parts[2] if len(parts) > 2 else 'rw'
+
+            if 'overlay' not in mount_mode:
+                continue
+
+            # Prepare upper and work directories unique to this container and mount
+            overlay_dir = os.path.join(overlay_base, self.container_name, f'{idx}')
+            upper_dir = os.path.join(overlay_dir, 'upper')
+            work_dir = os.path.join(overlay_dir, 'work')
+            os.makedirs(upper_dir, exist_ok=True)
+            os.makedirs(work_dir, exist_ok=True)
+
+            driver_cfg = DriverConfig(
+                name='local',
+                options={
+                    'type': 'overlay',
+                    'device': 'overlay',
+                    'o': f'lowerdir={host_path},upperdir={upper_dir},workdir={work_dir}',
+                },
+            )
+
+            mount = Mount(
+                target=container_path,
+                source='',  # Anonymous volume
+                type='volume',
+                labels={
+                    'app': 'openhands',
+                    'role': 'worker',
+                    'container': self.container_name,
+                },
+                driver_config=driver_cfg,
+            )
+
+            overlay_mounts.append(mount)
+
+        return overlay_mounts
+
    def init_container(self) -> None:
        self.log('debug', 'Preparing to start container...')
        self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME)
@@ -409,6 +479,9 @@ class DockerRuntime(ActionExecutionClient):
        try:
            if self.runtime_container_image is None:
                raise ValueError('Runtime container image is not set')
+            # Process overlay mounts (read-only lower with per-container COW)
+            overlay_mounts = self._process_overlay_mounts()
+
            self.container = self.docker_client.containers.run(
                self.runtime_container_image,
                command=command,
@@ -421,6 +494,7 @@ class DockerRuntime(ActionExecutionClient):
                detach=True,
                environment=environment,
                volumes=volumes,  # type: ignore
+                mounts=overlay_mounts,  # type: ignore
                device_requests=device_requests,
                **(self.config.sandbox.docker_runtime_kwargs or {}),
            )
@@ -609,7 +683,8 @@ class DockerRuntime(ActionExecutionClient):

    def pause(self) -> None:
        """Pause the runtime by stopping the container.
-        This is different from container.stop() as it ensures environment variables are properly preserved."""
+        This is different from container.stop() as it ensures environment variables are properly preserved.
+        """
        if not self.container:
            raise RuntimeError('Container not initialized')

@@ -622,7 +697,8 @@ class DockerRuntime(ActionExecutionClient):

    def resume(self) -> None:
        """Resume the runtime by starting the container.
-        This is different from container.start() as it ensures environment variables are properly restored."""
+        This is different from container.start() as it ensures environment variables are properly restored.
+        """
        if not self.container:
            raise RuntimeError('Container not initialized')

@@ -10,17 +10,18 @@ from jinja2 import Environment, FileSystemLoader
 from pydantic import BaseModel, ConfigDict, Field

 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.mcp_config import MCPConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
    ChangeAgentStateAction,
    NullAction,
 )
 from openhands.events.event_filter import EventFilter
+from openhands.events.event_store import EventStore
 from openhands.events.observation import (
    AgentStateChangedObservation,
    NullObservation,
 )
-from openhands.events.stream import EventStream
 from openhands.integrations.provider import (
    PROVIDER_TOKEN_TYPE,
    ProviderHandler,
@@ -44,11 +45,11 @@ from openhands.server.services.conversation_service import (
    create_new_conversation,
    setup_init_convo_settings,
 )
-from openhands.server.session.conversation import ServerConversation
 from openhands.server.shared import (
    ConversationStoreImpl,
    config,
    conversation_manager,
+    file_store,
 )
 from openhands.server.types import LLMAuthenticationError, MissingSettingsError
 from openhands.server.user_auth import (
@@ -60,7 +61,7 @@ from openhands.server.user_auth import (
    get_user_settings_store,
 )
 from openhands.server.user_auth.user_auth import AuthType
-from openhands.server.utils import get_conversation as get_conversation_object
+from openhands.server.utils import get_conversation as get_conversation_metadata
 from openhands.server.utils import get_conversation_store
 from openhands.storage.conversation.conversation_store import ConversationStore
 from openhands.storage.data_models.conversation_metadata import (
@@ -87,6 +88,7 @@ class InitSessionRequest(BaseModel):
    suggested_task: SuggestedTask | None = None
    create_microagent: CreateMicroagent | None = None
    conversation_instructions: str | None = None
+    mcp_config: MCPConfig | None = None
    # Only nested runtimes require the ability to specify a conversation id, and it could be a security risk
    if os.getenv('ALLOW_SET_CONVERSATION_ID', '0') == '1':
        conversation_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
@@ -178,6 +180,7 @@ async def new_conversation(
            conversation_instructions=conversation_instructions,
            git_provider=git_provider,
            conversation_id=conversation_id,
+            mcp_config=data.mcp_config,
        )

        return ConversationResponse(
@@ -331,23 +334,20 @@ async def delete_conversation(
    return True


-@app.get('/conversations/{conversation_id}/remember_prompt')
+@app.get('/conversations/{conversation_id}/remember-prompt')
 async def get_prompt(
+    conversation_id: str,
    event_id: int,
    user_settings: SettingsStore = Depends(get_user_settings_store),
-    conversation: ServerConversation | None = Depends(get_conversation_object),
+    metadata: ConversationMetadata = Depends(get_conversation_metadata),
 ):
-    if conversation is None:
-        return JSONResponse(
-            status_code=404,
-            content={'error': 'Conversation not found.'},
-        )
-
-    # get event stream for the conversation
-    event_stream = conversation.event_stream
+    # get event store for the conversation
+    event_store = EventStore(
+        sid=conversation_id, file_store=file_store, user_id=metadata.user_id
+    )

    # retrieve the relevant events
-    stringified_events = _get_contextual_events(event_stream, event_id)
+    stringified_events = _get_contextual_events(event_store, event_id)

    # generate a prompt
    settings = await user_settings.load()
@@ -551,7 +551,7 @@ async def stop_conversation(
        )


-def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
+def _get_contextual_events(event_store: EventStore, event_id: int) -> str:
    # find the specified events to learn from
    # Get X events around the target event
    context_size = 4
@@ -567,7 +567,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
    )  # the types of events that can be in an agent's history

    # from event_id - context_size to event_id..
-    context_before = event_stream.search_events(
+    context_before = event_store.search_events(
        start_id=event_id,
        filter=agent_event_filter,
        reverse=True,
@@ -575,7 +575,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
    )

    # from event_id to event_id + context_size + 1
-    context_after = event_stream.search_events(
+    context_after = event_store.search_events(
        start_id=event_id + 1,
        filter=agent_event_filter,
        limit=context_size + 1,
@@ -2,6 +2,7 @@ import uuid
 from types import MappingProxyType
 from typing import Any

+from openhands.core.config.mcp_config import MCPConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.message import MessageAction
 from openhands.experiments.experiment_manager import ExperimentManagerImpl
@@ -44,6 +45,7 @@ async def create_new_conversation(
    attach_convo_id: bool = False,
    git_provider: ProviderType | None = None,
    conversation_id: str | None = None,
+    mcp_config: MCPConfig | None = None,
 ) -> AgentLoopInfo:
    logger.info(
        'Creating conversation',
@@ -82,6 +84,9 @@ async def create_new_conversation(
    session_init_args['selected_branch'] = selected_branch
    session_init_args['git_provider'] = git_provider
    session_init_args['conversation_instructions'] = conversation_instructions
+    if mcp_config:
+        session_init_args['mcp_config'] = mcp_config
+
    conversation_init_data = ConversationInitData(**session_init_args)

    logger.info('Loading conversation store')
@@ -124,10 +124,12 @@ class Session:
        )

        # Set Git user configuration if provided in settings
-        if hasattr(settings, 'git_user_name') and settings.git_user_name:
-            self.config.git_user_name = settings.git_user_name
-        if hasattr(settings, 'git_user_email') and settings.git_user_email:
-            self.config.git_user_email = settings.git_user_email
+        git_user_name = getattr(settings, 'git_user_name', None)
+        if git_user_name is not None:
+            self.config.git_user_name = git_user_name
+        git_user_email = getattr(settings, 'git_user_email', None)
+        if git_user_email is not None:
+            self.config.git_user_email = git_user_email
        max_iterations = settings.max_iterations or self.config.max_iterations

        # Prioritize settings over config for max_budget_per_task
@@ -152,6 +154,14 @@ class Session:
        self.logger.debug(
            f'MCP configuration before setup - self.config.mcp_config: {self.config.mcp}'
        )
+
+        # Check if settings has custom mcp_config
+        mcp_config = getattr(settings, 'mcp_config', None)
+        if mcp_config is not None:
+            # Use the provided MCP SHTTP servers instead of default setup
+            self.config.mcp = self.config.mcp.merge(mcp_config)
+            self.logger.debug(f'Merged custom MCP Config: {mcp_config}')
+
        # Add OpenHands' MCP server by default
        openhands_mcp_server, openhands_mcp_stdio_servers = (
            OpenHandsMCPConfigImpl.create_default_mcp_server_config(
@@ -163,7 +173,7 @@ class Session:
            self.config.mcp.shttp_servers.append(openhands_mcp_server)
            self.logger.debug('Added default MCP HTTP server to config')

-        self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)
+            self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)

        self.logger.debug(
            f'MCP configuration after setup - self.config.mcp: {self.config.mcp}'
@@ -27,10 +27,11 @@ assert isinstance(server_config_interface, ServerConfig), (
 )
 server_config: ServerConfig = server_config_interface
 file_store: FileStore = get_file_store(
-    config.file_store,
-    config.file_store_path,
-    config.file_store_web_hook_url,
-    config.file_store_web_hook_headers,
+    file_store_type=config.file_store,
+    file_store_path=config.file_store_path,
+    file_store_web_hook_url=config.file_store_web_hook_url,
+    file_store_web_hook_headers=config.file_store_web_hook_headers,
+    file_store_web_hook_batch=config.file_store_web_hook_batch,
 )

 client_manager = None
@@ -61,9 +61,12 @@ The `WebHookFileStore` wraps another `FileStore` implementation and sends HTTP r
 **Configuration Options:**
 - `file_store_web_hook_url`: The base URL for webhook requests
 - `file_store_web_hook_headers`: HTTP headers to include in webhook requests
+- `file_store_web_hook_batch`: Whether to use batched webhook requests (default: false)

 ### Protocol Details

+#### Standard Webhook Protocol (Non-Batched)
+
 1. **File Write Operation**:
   - When a file is written, a POST request is sent to `{base_url}{path}`
   - The request body contains the file contents
@@ -73,6 +76,27 @@ The `WebHookFileStore` wraps another `FileStore` implementation and sends HTTP r
   - When a file is deleted, a DELETE request is sent to `{base_url}{path}`
   - The operation is retried up to 3 times with a 1-second delay between attempts

+#### Batched Webhook Protocol
+
+The `BatchedWebHookFileStore` extends the webhook functionality by batching multiple file operations into a single request, which can significantly improve performance when many files are being modified in a short period of time.
+
+1. **Batch Request**:
+   - A single POST request is sent to `{base_url}` with a JSON array in the body
+   - Each item in the array contains:
+     - `method`: "POST" for write operations, "DELETE" for delete operations
+     - `path`: The file path
+     - `content`: The file contents (for write operations only)
+     - `encoding`: "base64" if binary content was base64-encoded (optional)
+
+2. **Batch Triggering**:
+   - Batches are sent when one of the following conditions is met:
+     - A timeout period has elapsed (defaults to 5 seconds, configurable via constructor parameter)
+     - The total size of batched content exceeds a size limit (defaults to 1MB, configurable via constructor parameter)
+     - The `flush()` method is explicitly called
+
+3. **Error Handling**:
+   - The batch request is retried up to 3 times with a 1-second delay between attempts
+
 ## Configuration

 To configure the storage module in OpenHands, use the following configuration options:
@@ -90,4 +114,14 @@ file_store_web_hook_url = "https://example.com/api/files"

 # Optional webhook headers (JSON string)
 file_store_web_hook_headers = '{"Authorization": "Bearer token"}'
+
+# Optional batched webhook mode (default: false)
+file_store_web_hook_batch = true
 ```
+
+**Batched Webhook Configuration:**
+The batched webhook behavior uses predefined constants with the following default values:
+- Batch timeout: 5 seconds
+- Batch size limit: 1MB (1048576 bytes)
+
+These values can be customized by passing `batch_timeout_seconds` and `batch_size_limit_bytes` parameters to the `BatchedWebHookFileStore` constructor.
@@ -2,6 +2,7 @@ import os

 import httpx

+from openhands.storage.batched_web_hook import BatchedWebHookFileStore
 from openhands.storage.files import FileStore
 from openhands.storage.google_cloud import GoogleCloudFileStore
 from openhands.storage.local import LocalFileStore
@@ -15,6 +16,7 @@ def get_file_store(
    file_store_path: str | None = None,
    file_store_web_hook_url: str | None = None,
    file_store_web_hook_headers: dict | None = None,
+    file_store_web_hook_batch: bool = False,
 ) -> FileStore:
    store: FileStore
    if file_store_type == 'local':
@@ -35,9 +37,21 @@ def get_file_store(
                file_store_web_hook_headers['X-Session-API-Key'] = os.getenv(
                    'SESSION_API_KEY'
                )
-        store = WebHookFileStore(
-            store,
-            file_store_web_hook_url,
-            httpx.Client(headers=file_store_web_hook_headers or {}),
-        )
+
+        client = httpx.Client(headers=file_store_web_hook_headers or {})
+
+        if file_store_web_hook_batch:
+            # Use batched webhook file store
+            store = BatchedWebHookFileStore(
+                store,
+                file_store_web_hook_url,
+                client,
+            )
+        else:
+            # Use regular webhook file store
+            store = WebHookFileStore(
+                store,
+                file_store_web_hook_url,
+                client,
+            )
    return store
@@ -0,0 +1,274 @@
+import threading
+from typing import Optional, Union
+
+import httpx
+import tenacity
+
+from openhands.storage.files import FileStore
+from openhands.utils.async_utils import EXECUTOR
+
+# Constants for batching configuration
+WEBHOOK_BATCH_TIMEOUT_SECONDS = 5.0
+WEBHOOK_BATCH_SIZE_LIMIT_BYTES = 1048576  # 1MB
+
+
+class BatchedWebHookFileStore(FileStore):
+    """
+    File store which batches updates before sending them to a webhook.
+
+    This class wraps another FileStore implementation and sends HTTP requests
+    to a specified URL when files are written or deleted. Updates are batched
+    and sent together after a certain amount of time passes or if the content
+    size exceeds a threshold.
+
+    Attributes:
+        file_store: The underlying FileStore implementation
+        base_url: The base URL for webhook requests
+        client: The HTTP client used to make webhook requests
+        batch_timeout_seconds: Time in seconds after which a batch is sent (default: WEBHOOK_BATCH_TIMEOUT_SECONDS)
+        batch_size_limit_bytes: Size limit in bytes after which a batch is sent (default: WEBHOOK_BATCH_SIZE_LIMIT_BYTES)
+        _batch_lock: Lock for thread-safe access to the batch
+        _batch: Dictionary of pending file updates
+        _batch_timer: Timer for sending batches after timeout
+        _batch_size: Current size of the batch in bytes
+    """
+
+    file_store: FileStore
+    base_url: str
+    client: httpx.Client
+    batch_timeout_seconds: float
+    batch_size_limit_bytes: int
+    _batch_lock: threading.Lock
+    _batch: dict[str, tuple[str, Optional[Union[str, bytes]]]]
+    _batch_timer: Optional[threading.Timer]
+    _batch_size: int
+
+    def __init__(
+        self,
+        file_store: FileStore,
+        base_url: str,
+        client: Optional[httpx.Client] = None,
+        batch_timeout_seconds: Optional[float] = None,
+        batch_size_limit_bytes: Optional[int] = None,
+    ):
+        """
+        Initialize a BatchedWebHookFileStore.
+
+        Args:
+            file_store: The underlying FileStore implementation
+            base_url: The base URL for webhook requests
+            client: Optional HTTP client to use for requests. If None, a new client will be created.
+            batch_timeout_seconds: Time in seconds after which a batch is sent.
+                If None, uses the default constant WEBHOOK_BATCH_TIMEOUT_SECONDS.
+            batch_size_limit_bytes: Size limit in bytes after which a batch is sent.
+                If None, uses the default constant WEBHOOK_BATCH_SIZE_LIMIT_BYTES.
+        """
+        self.file_store = file_store
+        self.base_url = base_url
+        if client is None:
+            client = httpx.Client()
+        self.client = client
+
+        # Use provided values or default constants
+        self.batch_timeout_seconds = (
+            batch_timeout_seconds or WEBHOOK_BATCH_TIMEOUT_SECONDS
+        )
+        self.batch_size_limit_bytes = (
+            batch_size_limit_bytes or WEBHOOK_BATCH_SIZE_LIMIT_BYTES
+        )
+
+        # Initialize batch state
+        self._batch_lock = threading.Lock()
+        self._batch = {}  # Maps path -> (operation, content)
+        self._batch_timer = None
+        self._batch_size = 0
+
+    def write(self, path: str, contents: Union[str, bytes]) -> None:
+        """
+        Write contents to a file and queue a webhook update.
+
+        Args:
+            path: The path to write to
+            contents: The contents to write
+        """
+        self.file_store.write(path, contents)
+        self._queue_update(path, 'write', contents)
+
+    def read(self, path: str) -> str:
+        """
+        Read contents from a file.
+
+        Args:
+            path: The path to read from
+
+        Returns:
+            The contents of the file
+        """
+        return self.file_store.read(path)
+
+    def list(self, path: str) -> list[str]:
+        """
+        List files in a directory.
+
+        Args:
+            path: The directory path to list
+
+        Returns:
+            A list of file paths
+        """
+        return self.file_store.list(path)
+
+    def delete(self, path: str) -> None:
+        """
+        Delete a file and queue a webhook update.
+
+        Args:
+            path: The path to delete
+        """
+        self.file_store.delete(path)
+        self._queue_update(path, 'delete', None)
+
+    def _queue_update(
+        self, path: str, operation: str, contents: Optional[Union[str, bytes]]
+    ) -> None:
+        """
+        Queue an update to be sent to the webhook.
+
+        Args:
+            path: The path that was modified
+            operation: The operation performed ("write" or "delete")
+            contents: The contents that were written (None for delete operations)
+        """
+        with self._batch_lock:
+            # Calculate content size
+            content_size = 0
+            if contents is not None:
+                if isinstance(contents, str):
+                    content_size = len(contents.encode('utf-8'))
+                else:
+                    content_size = len(contents)
+
+            # Update batch size calculation
+            # If this path already exists in the batch, subtract its previous size
+            if path in self._batch:
+                prev_op, prev_contents = self._batch[path]
+                if prev_contents is not None:
+                    if isinstance(prev_contents, str):
+                        self._batch_size -= len(prev_contents.encode('utf-8'))
+                    else:
+                        self._batch_size -= len(prev_contents)
+
+            # Add new content size
+            self._batch_size += content_size
+
+            # Add to batch
+            self._batch[path] = (operation, contents)
+
+            # Check if we need to send the batch due to size limit
+            if self._batch_size >= self.batch_size_limit_bytes:
+                # Submit to executor to avoid blocking
+                EXECUTOR.submit(self._send_batch)
+                return
+
+            # Start or reset the timer for sending the batch
+            if self._batch_timer is not None:
+                self._batch_timer.cancel()
+                self._batch_timer = None
+
+            timer = threading.Timer(
+                self.batch_timeout_seconds, self._send_batch_from_timer
+            )
+            timer.daemon = True
+            timer.start()
+            self._batch_timer = timer
+
+    def _send_batch_from_timer(self) -> None:
+        """
+        Send the batch from the timer thread.
+        This method is called by the timer and submits the actual sending to the executor.
+        """
+        EXECUTOR.submit(self._send_batch)
+
+    def _send_batch(self) -> None:
+        """
+        Send the current batch of updates to the webhook as a single request.
+        This method acquires the batch lock and processes all pending updates in one batch.
+        """
+        batch_to_send: dict[str, tuple[str, Optional[Union[str, bytes]]]] = {}
+
+        with self._batch_lock:
+            if not self._batch:
+                return
+
+            # Copy the batch and clear the current one
+            batch_to_send = self._batch.copy()
+            self._batch.clear()
+            self._batch_size = 0
+
+            # Cancel any pending timer
+            if self._batch_timer is not None:
+                self._batch_timer.cancel()
+                self._batch_timer = None
+
+        # Process the entire batch in a single request
+        if batch_to_send:
+            try:
+                self._send_batch_request(batch_to_send)
+            except Exception as e:
+                # Log the error
+                print(f'Error sending webhook batch: {e}')
+
+    @tenacity.retry(
+        wait=tenacity.wait_fixed(1),
+        stop=tenacity.stop_after_attempt(3),
+    )
+    def _send_batch_request(
+        self, batch: dict[str, tuple[str, Optional[Union[str, bytes]]]]
+    ) -> None:
+        """
+        Send a single batch request to the webhook URL with all updates.
+
+        This method is retried up to 3 times with a 1-second delay between attempts.
+
+        Args:
+            batch: Dictionary mapping paths to (operation, contents) tuples
+
+        Raises:
+            httpx.HTTPStatusError: If the webhook request fails
+        """
+        # Prepare the batch payload
+        batch_payload = []
+
+        for path, (operation, contents) in batch.items():
+            item = {
+                'method': 'POST' if operation == 'write' else 'DELETE',
+                'path': path,
+            }
+
+            if operation == 'write' and contents is not None:
+                # Convert bytes to string if needed
+                if isinstance(contents, bytes):
+                    try:
+                        # Try to decode as UTF-8
+                        item['content'] = contents.decode('utf-8')
+                    except UnicodeDecodeError:
+                        # If not UTF-8, use base64 encoding
+                        import base64
+
+                        item['content'] = base64.b64encode(contents).decode('ascii')
+                        item['encoding'] = 'base64'
+                else:
+                    item['content'] = contents
+
+            batch_payload.append(item)
+
+        # Send the batch as a single request
+        response = self.client.post(self.base_url, json=batch_payload)
+        response.raise_for_status()
+
+    def flush(self) -> None:
+        """
+        Immediately send any pending updates to the webhook.
+        This can be called to ensure all updates are sent before shutting down.
+        """
+        self._send_batch()
@@ -106,10 +106,11 @@ class FileConversationStore(ConversationStore):
        cls, config: OpenHandsConfig, user_id: str | None
    ) -> FileConversationStore:
        file_store = get_file_store(
-            config.file_store,
-            config.file_store_path,
-            config.file_store_web_hook_url,
-            config.file_store_web_hook_headers,
+            file_store_type=config.file_store,
+            file_store_path=config.file_store_path,
+            file_store_web_hook_url=config.file_store_web_hook_url,
+            file_store_web_hook_headers=config.file_store_web_hook_headers,
+            file_store_web_hook_batch=config.file_store_web_hook_batch,
        )
        return FileConversationStore(file_store)

@@ -40,9 +40,10 @@ class FileSecretsStore(SecretsStore):
        cls, config: OpenHandsConfig, user_id: str | None
    ) -> FileSecretsStore:
        file_store = get_file_store(
-            config.file_store,
-            config.file_store_path,
-            config.file_store_web_hook_url,
-            config.file_store_web_hook_headers,
+            file_store_type=config.file_store,
+            file_store_path=config.file_store_path,
+            file_store_web_hook_url=config.file_store_web_hook_url,
+            file_store_web_hook_headers=config.file_store_web_hook_headers,
+            file_store_web_hook_batch=config.file_store_web_hook_batch,
        )
        return FileSecretsStore(file_store)
@@ -34,9 +34,10 @@ class FileSettingsStore(SettingsStore):
        cls, config: OpenHandsConfig, user_id: str | None
    ) -> FileSettingsStore:
        file_store = get_file_store(
-            config.file_store,
-            config.file_store_path,
-            config.file_store_web_hook_url,
-            config.file_store_web_hook_headers,
+            file_store_type=config.file_store,
+            file_store_path=config.file_store_path,
+            file_store_web_hook_url=config.file_store_web_hook_url,
+            file_store_web_hook_headers=config.file_store_web_hook_headers,
+            file_store_web_hook_batch=config.file_store_web_hook_batch,
        )
        return FileSettingsStore(file_store)
@@ -1,78 +0,0 @@
-"""
-LiteLLM currently have an issue where HttpHandlers are being created but not
-closed. We have submitted a PR to them, (https://github.com/BerriAI/litellm/pull/8711)
-and their dev team say they are in the process of a refactor that will fix this, but
-in the meantime, we need to manage the lifecycle of the httpx.Client manually.
-
-We can't simply pass in our own client object, because all the different implementations use
-different types of client object.
-
-So we monkey patch the httpx.Client class to track newly created instances and close these
-when the operations complete. (Since some paths create a single shared client and reuse these,
-we actually need to create a proxy object that allows these clients to be reusable.)
-
-Hopefully, this will be fixed soon and we can remove this abomination.
-"""
-
-import contextlib
-from typing import Callable
-
-import httpx
-
-
-@contextlib.contextmanager
-def ensure_httpx_close():
-    wrapped_class = httpx.Client
-    proxys = []
-
-    class ClientProxy:
-        """
-        Sometimes LiteLLM opens a new httpx client for each connection, and does not close them.
-        Sometimes it does close them. Sometimes, it reuses a client between connections. For cases
-        where a client is reused, we need to be able to reuse the client even after closing it.
-        """
-
-        client_constructor: Callable
-        args: tuple
-        kwargs: dict
-        client: httpx.Client
-
-        def __init__(self, *args, **kwargs):
-            self.args = args
-            self.kwargs = kwargs
-            self.client = wrapped_class(*self.args, **self.kwargs)
-            proxys.append(self)
-
-        def __getattr__(self, name):
-            # Invoke a method on the proxied client - create one if required
-            if self.client is None:
-                self.client = wrapped_class(*self.args, **self.kwargs)
-            return getattr(self.client, name)
-
-        def close(self):
-            # Close the client if it is open
-            if self.client:
-                self.client.close()
-                self.client = None
-
-        def __iter__(self, *args, **kwargs):
-            # We have to override this as debuggers invoke it causing the client to reopen
-            if self.client:
-                return self.client.iter(*args, **kwargs)
-            return object.__getattribute__(self, 'iter')(*args, **kwargs)
-
-        @property
-        def is_closed(self):
-            # Check if closed
-            if self.client is None:
-                return True
-            return self.client.is_closed
-
-    httpx.Client = ClientProxy
-    try:
-        yield
-    finally:
-        httpx.Client = wrapped_class
-        while proxys:
-            proxy = proxys.pop()
-            proxy.close()
@@ -56,6 +56,7 @@ def get_supported_llm_models(config: OpenHandsConfig) -> list[str]:
    # Add OpenHands provider models
    openhands_models = [
        'openhands/claude-sonnet-4-20250514',
+        'openhands/gpt-5-2025-08-07',
        'openhands/claude-opus-4-20250514',
        'openhands/gemini-2.5-pro',
        'openhands/o3',
@@ -91,7 +91,10 @@ class PromptManager:
            return Template(file.read())

    def get_system_message(self) -> str:
-        return self.system_template.render().strip()
+        from openhands.agenthub.codeact_agent.tools.prompt import refine_prompt
+
+        system_message = self.system_template.render().strip()
+        return refine_prompt(system_message)

    def get_example_user_message(self) -> str:
        """This is an initial user message that can be provided to the agent
@@ -10,6 +10,7 @@ class TermColor(Enum):
    SUCCESS = 'green'
    ERROR = 'red'
    INFO = 'blue'
+    GREY = 'dark_grey'


 def colorize(text: str, color: TermColor = TermColor.WARNING) -> str:
@@ -1,175 +0,0 @@
-# Performance Testing with Tool Calls
-
-## Overview
-
-This document describes the enhanced performance testing architecture that includes tool calls to better simulate real-world OpenHands usage patterns. Instead of simple prompt-response testing, we now test the complete tool interaction workflow.
-
-## Why Tool Call Testing Matters
-
- **Real-world simulation**: OpenHands frequently uses tools (bash, file editing, etc.)
- **Latency impact**: Tool calls add multiple round-trips and processing overhead
- **Performance bottlenecks**: Tool parsing and execution can reveal different performance characteristics
- **Complete workflow**: Tests the full LLM → Tool → LLM → Summary cycle
-
-## Test Architecture
-
-### 3-Step Tool Call Workflow
-
-Each performance test now follows this standardized 3-step process:
-
-#### Step 1: Initial Tool Request
- **Prompt**: "What is the product of 45 and 126? Use the math tool to calculate this."
- **Tool Definition**: Provide a `math` tool that can compute products
- **Expected**: LLM should respond with a tool call to `math(a=45, b=126)`
- **Measure**: Time to generate tool call response
-
-#### Step 2: Tool Execution & Response
- **Action**: Execute the math tool function (45 × 126 = 5670)
- **Response**: Send tool result back to LLM as a tool message
- **Expected**: LLM acknowledges the result
- **Measure**: Time to process tool result
-
-#### Step 3: Summary Request
- **Prompt**: "Please summarize what just happened in our conversation."
- **Expected**: LLM provides a summary of the math calculation
- **Measure**: Time to generate summary response
-
-### Message History Tracking
-
-All messages and responses are preserved in a `messages` array:
-
-```python
-messages = [
-    {"role": "user", "content": "What is the product of 45 and 126? Use the math tool."},
-    {"role": "assistant", "content": "", "tool_calls": [...]},  # Step 1 response
-    {"role": "tool", "tool_call_id": "...", "content": "5670"},  # Step 2 tool result
-    {"role": "assistant", "content": "The product is 5670."},  # Step 2 response
-    {"role": "user", "content": "Please summarize what just happened."},
-    {"role": "assistant", "content": "I calculated 45 × 126 = 5670..."}  # Step 3 response
-]
-```
-
-## Tool Definition
-
-The `math` tool is defined consistently across all tests:
-
-```python
-MATH_TOOL = {
-    "type": "function",
-    "function": {
-        "name": "math",
-        "description": "Perform mathematical calculations",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "operation": {
-                    "type": "string",
-                    "description": "The mathematical operation to perform",
-                    "enum": ["add", "subtract", "multiply", "divide"]
-                },
-                "a": {
-                    "type": "number",
-                    "description": "First number"
-                },
-                "b": {
-                    "type": "number",
-                    "description": "Second number"
-                }
-            },
-            "required": ["operation", "a", "b"]
-        }
-    }
-}
-```
-
-## Performance Metrics
-
-Each test measures:
-
- **Step 1 Duration**: Time to generate initial tool call
- **Step 2 Duration**: Time to process tool result
- **Step 3 Duration**: Time to generate summary
- **Total Duration**: End-to-end workflow time
- **Tool Call Accuracy**: Whether LLM correctly used the tool
- **Response Quality**: Whether all steps completed successfully
-
-## Security Considerations
-
-### Environment Variables
-
-All tests now use secure environment variable-based authentication:
-
- **LiteLLM Tests**: Use `LITELLM_PROXY_API_KEY` and `LITELLM_BASE_URL`
- **Native API Tests**: Use `GEMINI_API_KEY` (for direct Google API calls)
- **OpenHands Tests**: Use `LITELLM_PROXY_API_KEY` and `LITELLM_BASE_URL` (routed through LiteLLM)
-
-### Credential Handling
-
- ✅ **Secure**: Read credentials from environment variables only
- ✅ **No Hardcoding**: No API keys in source code or documentation
- ✅ **Error Handling**: Graceful failure when credentials are missing
- ✅ **Logging**: No credential values in logs or output
-
-```python
-# Secure credential handling example
-api_key = os.getenv('LITELLM_PROXY_API_KEY')
-base_url = os.getenv('LITELLM_BASE_URL')
-
-if not api_key:
-    print('❌ LITELLM_PROXY_API_KEY environment variable not set')
-    return
-
-# Never log or print the actual key values
-print(f'✅ Using base URL: {base_url}')  # OK to log URL
-print('✅ API key configured')  # OK to confirm presence
-```
-
-## Implementation Files
-
-### Core Utility
- `test_utils.py`: Shared tool call testing utilities
-
-### Test Files
- `test_thinking_budget.py`: Primary thinking/reasoning with tool calls
- `test_litellm_comprehensive.py`: LiteLLM performance with tool calls
- `test_native_gemini.py`: Native API baseline with tool calls
- `test_openhands_gemini_fix.py`: OpenHands fix verification with tool calls
- `run_performance_tests.py`: Orchestrator for all tool-based tests
-
-## Expected Results
-
-Tool call testing typically shows:
-
- **Higher Latency**: 2-3x longer than simple prompts due to multiple round-trips
- **Reasoning Impact**: Thinking budget affects tool call generation speed
- **Streaming Benefits**: Less pronounced due to structured tool responses
- **Error Patterns**: Tool parsing failures reveal different bottlenecks
-
-## Usage Examples
-
-### Environment Setup
-```bash
-# Required for LiteLLM-based tests
-export LITELLM_PROXY_API_KEY="your-api-key-here"
-export LITELLM_BASE_URL="https://your-litellm-endpoint"
-
-# Required for native Google API tests
-export GEMINI_API_KEY="your-google-api-key-here"
-```
-
-### Running Tests
-```bash
-# Run individual test with tool calls
-python test_thinking_budget.py
-
-# Run comprehensive suite with tool calls
-python run_performance_tests.py
-```
-
-## References
-
-This architecture is based on:
- OpenHands tool calling patterns (source: OpenHands codebase)
- LiteLLM tool calling documentation (source: LiteLLM docs)
- Google Gemini function calling API (source: Google AI documentation)
- Security best practices for API key management (source: OWASP guidelines)
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.

 [[package]]
 name = "aiofiles"
@@ -5152,8 +5152,11 @@ files = [
    {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
    {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
+    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
    {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
    {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@@ -5227,6 +5230,22 @@ files = [
 [package.dependencies]
 cobble = ">=0.1.3,<0.2"

+[[package]]
+name = "markdown"
+version = "3.8.2"
+description = "Python implementation of John Gruber's Markdown."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24"},
+    {file = "markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45"},
+]
+
+[package.extras]
+docs = ["mdx_gh_links (>=0.2)", "mkdocs (>=1.6)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
+testing = ["coverage", "pyyaml"]
+
 [[package]]
 name = "markdown-it-py"
 version = "3.0.0"
@@ -10446,6 +10465,18 @@ files = [
 ]
 markers = {main = "extra == \"third-party-runtimes\""}

+[[package]]
+name = "types-markdown"
+version = "3.8.0.20250809"
+description = "Typing stubs for Markdown"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "types_markdown-3.8.0.20250809-py3-none-any.whl", hash = "sha256:3f34a38c2259a3158e90ab0cb058cd8f4fdd3d75e2a0b335cb57f25dc2bc77d3"},
+    {file = "types_markdown-3.8.0.20250809.tar.gz", hash = "sha256:fa619e735878a244332a4bbe16bcfc44e49ff6264c2696056278f0642cdfa223"},
+]
+
 [[package]]
 name = "types-python-dateutil"
 version = "2.9.0.20250516"
@@ -11766,4 +11797,4 @@ third-party-runtimes = ["daytona", "e2b", "modal", "runloop-api-client"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12,<3.14"
-content-hash = "8568c6ec2e11d4fcb23e206a24896b4d2d50e694c04011b668148f484e95b406"
+content-hash = "9fd177a2dfa1eebb9212e515db93c58f82d6126cc2d131de5321d68772bc2a59"
@@ -42,6 +42,7 @@ numpy = "*"
 json-repair = "*"
 browsergym-core = "0.13.3"                         # integrate browsergym-core as the browsing interface
 html2text = "*"
+markdown = "*"                                     # For markdown to HTML conversion
 deprecated = "*"
 pexpect = "*"
 jinja2 = "^3.1.3"
@@ -114,6 +115,7 @@ pre-commit = "4.2.0"
 build = "*"
 types-setuptools = "*"
 pytest = "^8.4.0"
+types-markdown = "^3.8.0.20250809"

 [tool.poetry.group.test]
 optional = true
@@ -166,7 +168,7 @@ joblib = "*"
 swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }

 [tool.poetry.scripts]
-openhands = "openhands.cli.main:main"
+openhands = "openhands.cli.entry:main"

 [tool.poetry.group.testgeneval.dependencies]
 fuzzywuzzy = "^0.18.0"
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Xingyao Wang	312993339f	docs: fix CLI mode doc when running in dev model	2025-08-11 17:55:21 -04:00
Tim O'Farrell	6f21b6700a	Fix for issues where callbacks are not batched (#10235 )	2025-08-11 15:44:48 -06:00
Tim O'Farrell	af49b615b1	Add BatchedWebHookFileStore for batching webhook updates (#10119 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-11 12:51:08 -06:00
Tim O'Farrell	4651edd5b3	Fix circular import by moving refine_prompt to dedicated module (#10223 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-11 12:17:18 -06:00
olyashok	d7f72fec9c	OverlayFS support for docker runtimes (#10222 )	2025-08-11 18:11:08 +00:00
mamoodi	09011c91f8	Remove rbren from UI changes reviewers (#10230 )	2025-08-11 13:32:29 -04:00
Xingyao Wang	e56fabfc5e	feat(cli): Add markdown schema visualization in CLI (#10193 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-11 15:47:38 +00:00
Xingyao Wang	56f752557c	Implement auto-pagination for conversation list with infinite scroll (#10129 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>	2025-08-11 15:03:29 +00:00
Calvin Smith	5f2ad7fbb0	Solvability setting switch (#9727 ) Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2025-08-11 08:57:47 -06:00
Ryan H. Tran	758e30c9a8	Remove SecretStr conversion in GAIA eval (#10204 )	2025-08-11 21:30:18 +08:00
dependabot[bot]	28017f232e	chore(deps): bump the version-all group across 1 directory with 9 updates (#10168 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-08-11 14:51:36 +04:00
Tim O'Farrell	3302c31c60	Removed Hack that is no longer required (#10195 )	2025-08-10 12:13:19 -06:00
Xingyao Wang	116ba199d1	feat(agent): stop using short tool description for gpt-5 (#10184 )	2025-08-09 17:56:52 -04:00
Boxuan Li	803bdced9c	Fix Windows prompt refinement: ensure 'bash' is replaced with 'powershell' in all prompts (#10179 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 20:28:36 -07:00
Xingyao Wang	3eecac2003	docs: Add GPT-5 model recommendation and fix pricing display issue (#10177 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 19:19:59 +00:00
mamoodi	c02e09fc2d	Hide Git Settings section from Application settings (#10176 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 19:06:40 +00:00
Tim O'Farrell	18f8661770	feat: add mcp_shttp_servers override to conversation initialization (#10171 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 18:05:44 +00:00
Xingyao Wang	04ff4a025b	feat(cli): Use CLI to launch OpenHands UI server via Docker (#9783 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-09 02:04:07 +08:00
mamoodi	81ef363658	Increase stale bot inactivity time and better messaging (#10167 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2025-08-08 16:41:15 +00:00
Xingyao Wang	1474c5bc1c	Support gpt-5-2025-08-07 and add it to OpenHands provider (#10172 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 16:05:51 +00:00
sp.wack	9b0a5da839	Use EventStore directly in remember prompt; merge client services (#10143 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 18:03:03 +04:00
Graham Neubig	7ab2ad2c1b	Fix authentication setup issues in unit tests (#10118 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-07 22:12:21 -04:00
Graham Neubig	8416a019cb	Fix unit test failures by prioritizing current directory in PYTHONPATH (#10105 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-07 22:12:02 -04:00