test(cli): Update tests for markdown rendering and agent message display

refactor(cli): Combine agent message and finish display functions
feat(cli): Add markdown rendering for agent messages
2026-04-29 03:00:45 -04:00 · 2025-08-10 18:21:38 +00:00 · 2025-08-10 18:20:23 +00:00 · 2025-08-10 18:17:33 +00:00 · 2025-08-09 17:56:52 -04:00 · 2025-08-08 20:28:36 -07:00
84 changed files with 1533 additions and 817 deletions
--- a/.github/scripts/update_pr_description.sh
+++ b/.github/scripts/update_pr_description.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+# This script updates the PR description with commands to run the PR locally
+# It adds both Docker and uvx commands
+
+# Get the branch name for the PR
+BRANCH_NAME=$(gh pr view "$PR_NUMBER" --json headRefName --jq .headRefName)
+
+# Define the Docker command
+DOCKER_RUN_COMMAND="docker run -it --rm \
+  -p 3000:3000 \
+  -v /var/run/docker.sock:/var/run/docker.sock \
+  --add-host host.docker.internal:host-gateway \
+  -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${SHORT_SHA}-nikolaik \
+  --name openhands-app-${SHORT_SHA} \
+  docker.all-hands.dev/all-hands-ai/openhands:${SHORT_SHA}"
+
+# Define the uvx command
+UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@${BRANCH_NAME} openhands"
+
+# Get the current PR body
+PR_BODY=$(gh pr view "$PR_NUMBER" --json body --jq .body)
+
+# Prepare the new PR body with both commands
+if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
+  # For existing PR descriptions, use a more robust approach
+  # Split the PR body at the "To run this PR locally" section and replace everything after it
+  BEFORE_SECTION=$(echo "$PR_BODY" | sed '/To run this PR locally, use the following command:/,$d')
+  NEW_PR_BODY=$(cat <<EOF
+${BEFORE_SECTION}
+
+To run this PR locally, use the following command:
+
+GUI with Docker:
+\`\`\`
+${DOCKER_RUN_COMMAND}
+\`\`\`
+
+CLI with uvx:
+\`\`\`
+${UVX_RUN_COMMAND}
+\`\`\`
+EOF
+)
+else
+  # For new PR descriptions: use heredoc safely without indentation
+  NEW_PR_BODY=$(cat <<EOF
+$PR_BODY
+
+---
+
+To run this PR locally, use the following command:
+
+GUI with Docker:
+\`\`\`
+${DOCKER_RUN_COMMAND}
+\`\`\`
+
+CLI with uvx:
+\`\`\`
+${UVX_RUN_COMMAND}
+\`\`\`
+EOF
+)
+fi
+
+# Update the PR description
+echo "Updating PR description with Docker and uvx commands"
+gh pr edit "$PR_NUMBER" --body "$NEW_PR_BODY"
--- a/.github/workflows/ghcr-build.yml
+++ b/.github/workflows/ghcr-build.yml
@@ -332,29 +332,5 @@ jobs:
          SHORT_SHA: ${{ steps.short_sha.outputs.SHORT_SHA }}
        shell: bash
        run: |
-          echo "updating PR description"
-          DOCKER_RUN_COMMAND="docker run -it --rm \
-            -p 3000:3000 \
-            -v /var/run/docker.sock:/var/run/docker.sock \
-            --add-host host.docker.internal:host-gateway \
-            -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:$SHORT_SHA-nikolaik \
-            --name openhands-app-$SHORT_SHA \
-            docker.all-hands.dev/all-hands-ai/openhands:$SHORT_SHA"
-
-          PR_BODY=$(gh pr view $PR_NUMBER --json body --jq .body)
-
-          if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
-            UPDATED_PR_BODY=$(echo "${PR_BODY}" | sed -E "s|docker run -it --rm.*|$DOCKER_RUN_COMMAND|")
-          else
-            UPDATED_PR_BODY="${PR_BODY}
-
-          ---
-
-          To run this PR locally, use the following command:
-          \`\`\`
-          $DOCKER_RUN_COMMAND
-          \`\`\`"
-          fi
-
-          echo "updated body: $UPDATED_PR_BODY"
-          gh pr edit $PR_NUMBER --body "$UPDATED_PR_BODY"
+          echo "Updating PR description with Docker and uvx commands"
+          bash ${GITHUB_WORKSPACE}/.github/scripts/update_pr_description.sh
--- a/.github/workflows/py-tests.yml
+++ b/.github/workflows/py-tests.yml
@@ -48,11 +48,11 @@ jobs:
      - name: Build Environment
        run: make build
      - name: Run Unit Tests
-        run: poetry run pytest --forked -n auto -svv ./tests/unit
+        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
      - name: Run Runtime Tests with CLIRuntime
-        run: TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
+        run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
      - name: Run E2E Tests
-        run: poetry run pytest -svv tests/e2e
+        run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e

  # Run specific Windows python tests
  test-on-windows:
@@ -77,9 +77,11 @@ jobs:
      - name: Run Windows unit tests
        run: poetry run pytest -svv tests/unit/test_windows_bash.py
        env:
+          PYTHONPATH: ".;$env:PYTHONPATH"
          DEBUG: "1"
      - name: Run Windows runtime tests with LocalRuntime
        run: $env:TEST_RUNTIME="local"; poetry run pytest -svv tests/runtime/test_bash.py
        env:
+          PYTHONPATH: ".;$env:PYTHONPATH"
          TEST_RUNTIME: local
          DEBUG: "1"
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -12,11 +12,11 @@ jobs:
    steps:
      - uses: actions/stale@v9
        with:
-          stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
-          stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
-          days-before-stale: 30
+          stale-issue-message: 'This issue is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
+          stale-pr-message: 'This PR is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
+          days-before-stale: 40
          exempt-issue-labels: 'roadmap'
-          close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
-          close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
-          days-before-close: 7
+          close-issue-message: 'This issue was automatically closed due to 50 days of inactivity. We do this to help keep the issues somewhat manageable and focus on active issues.'
+          close-pr-message: 'This PR was closed because it had no activity for 50 days. If you feel this was closed in error, and you would like to continue the PR, please resubmit or let us know.'
+          days-before-close: 10
          operations-per-run: 150
--- a/containers/app/Dockerfile
+++ b/containers/app/Dockerfile
@@ -58,34 +58,34 @@ RUN sed -i 's/^UID_MIN.*/UID_MIN 499/' /etc/login.defs
 # Default is 60000, but we've seen up to 200000
 RUN sed -i 's/^UID_MAX.*/UID_MAX 1000000/' /etc/login.defs

-RUN groupadd --gid $OPENHANDS_USER_ID openhands
+RUN groupadd --gid $OPENHANDS_USER_ID app
 RUN useradd -l -m -u $OPENHANDS_USER_ID --gid $OPENHANDS_USER_ID -s /bin/bash openhands && \
-    usermod -aG openhands openhands && \
+    usermod -aG app openhands && \
    usermod -aG sudo openhands && \
    echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-RUN chown -R openhands:openhands /app && chmod -R 770 /app
-RUN sudo chown -R openhands:openhands $WORKSPACE_BASE && sudo chmod -R 770 $WORKSPACE_BASE
+RUN chown -R openhands:app /app && chmod -R 770 /app
+RUN sudo chown -R openhands:app $WORKSPACE_BASE && sudo chmod -R 770 $WORKSPACE_BASE
 USER openhands

 ENV VIRTUAL_ENV=/app/.venv \
    PATH="/app/.venv/bin:$PATH" \
    PYTHONPATH='/app'

-COPY --chown=openhands:openhands --chmod=770 --from=backend-builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
+COPY --chown=openhands:app --chmod=770 --from=backend-builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}

-COPY --chown=openhands:openhands --chmod=770 ./microagents ./microagents
-COPY --chown=openhands:openhands --chmod=770 ./openhands ./openhands
-COPY --chown=openhands:openhands --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
-COPY --chown=openhands:openhands pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
+COPY --chown=openhands:app --chmod=770 ./microagents ./microagents
+COPY --chown=openhands:app --chmod=770 ./openhands ./openhands
+COPY --chown=openhands:app --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
+COPY --chown=openhands:app pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./

 # This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
 RUN python openhands/core/download.py # No-op to download assets
 # Add this line to set group ownership of all files/directories not already in "app" group
-# openhands:openhands -> openhands:openhands
-RUN find /app \! -group openhands -exec chgrp openhands {} +
+# openhands:openhands -> openhands:app
+RUN find /app \! -group app -exec chgrp app {} +

-COPY --chown=openhands:openhands --chmod=770 --from=frontend-builder /app/build ./frontend/build
-COPY --chown=openhands:openhands --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
+COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build ./frontend/build
+COPY --chown=openhands:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh

 USER root

--- a/docs/usage/how-to/gui-mode.mdx
+++ b/docs/usage/how-to/gui-mode.mdx
@@ -7,6 +7,67 @@ description: High level overview of the Graphical User Interface (GUI) in OpenHa

 - [OpenHands is running](/usage/local-setup)

+## Launching the GUI Server
+
+### Using the CLI Command
+
+You can launch the OpenHands GUI server directly from the command line using the `serve` command:
+
+<Callout type="info">
+**Prerequisites**: You need to have the [OpenHands CLI installed](/usage/how-to/cli-mode) first, OR have `uv` installed and run `uvx --python 3.12 --from openhands-ai openhands serve`. Otherwise, you'll need to use Docker directly (see the [Docker section](#using-docker-directly) below).
+</Callout>
+
+```bash
+openhands serve
+```
+
+This command will:
+- Check that Docker is installed and running
+- Pull the required Docker images
+- Launch the OpenHands GUI server at http://localhost:3000
+- Use the same configuration directory (`~/.openhands`) as the CLI mode
+
+#### Mounting Your Current Directory
+
+To mount your current working directory into the GUI server container, use the `--mount-cwd` flag:
+
+```bash
+openhands serve --mount-cwd
+```
+
+This is useful when you want to work on files in your current directory through the GUI. The directory will be mounted at `/workspace` inside the container.
+
+#### Using GPU Support
+
+If you have NVIDIA GPUs and want to make them available to the OpenHands container, use the `--gpu` flag:
+
+```bash
+openhands serve --gpu
+```
+
+This will enable GPU support via nvidia-docker, mounting all available GPUs into the container. You can combine this with other flags:
+
+```bash
+openhands serve --gpu --mount-cwd
+```
+
+**Prerequisites for GPU support:**
+- NVIDIA GPU drivers must be installed on your host system
+- [NVIDIA Container Toolkit (nvidia-docker2)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) must be installed and configured
+
+#### Requirements
+
+Before using the `openhands serve` command, ensure that:
+- Docker is installed and running on your system
+- You have internet access to pull the required Docker images
+- Port 3000 is available on your system
+
+The CLI will automatically check these requirements and provide helpful error messages if anything is missing.
+
+### Using Docker Directly
+
+Alternatively, you can run the GUI server using Docker directly. See the [local setup guide](/usage/local-setup) for detailed Docker instructions.
+
 ## Overview

 ### Initial Setup
--- a/docs/usage/llms/llms.mdx
+++ b/docs/usage/llms/llms.mdx
@@ -18,7 +18,7 @@ Based on these findings and community feedback, these are the latest models that
 ### Cloud / API-Based Models

 - [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommended)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
+- [openai/gpt-5-2025-08-07](https://openai.com/api/) (recommended)
 - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
 - [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
 - [moonshot/kimi-k2-0711-preview](https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2)
--- a/docs/usage/llms/openhands-llms.mdx
+++ b/docs/usage/llms/openhands-llms.mdx
@@ -32,4 +32,4 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr

 Pricing follows official API provider rates. [You can view model prices here.](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)

-For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: $0.4 per million input tokens and $1.6 per million output tokens.
+For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: \$0.4 per million input tokens and \$1.6 per million output tokens.
--- a/docs/usage/local-setup.mdx
+++ b/docs/usage/local-setup.mdx
@@ -66,6 +66,30 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to

 ### Start the App

+#### Option 1: Using the CLI Launcher (Recommended)
+
+If you have Python 3.12+ installed, you can use the CLI launcher for a simpler experience:
+
+```bash
+# Install OpenHands
+pip install openhands-ai
+
+# Launch the GUI server
+openhands serve
+
+# Or with GPU support (requires nvidia-docker)
+openhands serve --gpu
+
+# Or with current directory mounted
+openhands serve --mount-cwd
+```
+
+Or using `uvx --python 3.12 --from openhands-ai openhands serve` if you have [uv](https://docs.astral.sh/uv/) installed.
+
+This will automatically handle Docker requirements checking, image pulling, and launching the GUI server. The `--gpu` flag enables GPU support via nvidia-docker, and `--mount-cwd` mounts your current directory into the container.
+
+#### Option 2: Using Docker Directly
+
 ```bash
 docker pull docker.all-hands.dev/all-hands-ai/runtime:0.51-nikolaik

--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -172,7 +172,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--answerer_model', '-a', default='gpt-3.5-turbo', help='answerer model'
    )
--- a/evaluation/benchmarks/commit0/run_infer.py
+++ b/evaluation/benchmarks/commit0/run_infer.py
@@ -26,8 +26,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -525,7 +525,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -31,8 +31,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
    load_from_toml,
 )
 from openhands.core.config.utils import get_agent_config_arg
@@ -294,7 +294,7 @@ Here is the task:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--level',
        type=str,
--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -20,8 +20,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -134,7 +134,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--hubs',
        type=str,
--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -38,8 +38,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -312,7 +312,7 @@ Ok now its time to start solving the question. Good luck!


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    # data split must be one of 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended'
    parser.add_argument(
        '--data-split',
--- a/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
@@ -21,7 +21,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
    load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -167,7 +167,7 @@ def process_predictions(predictions_path: str):


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '-s',
        '--eval-split',
--- a/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
@@ -30,8 +30,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
    load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -358,7 +358,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '-s',
        '--eval-split',
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -267,7 +267,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -23,8 +23,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -229,7 +229,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()

    SUBSETS = [
        # Eurus subset: https://arxiv.org/abs/2404.02078
--- a/evaluation/benchmarks/ml_bench/run_analysis.py
+++ b/evaluation/benchmarks/ml_bench/run_analysis.py
@@ -4,7 +4,11 @@ import pprint

 import tqdm

-from openhands.core.config import get_llm_config_arg, get_parser, load_openhands_config
+from openhands.core.config import (
+    get_evaluation_parser,
+    get_llm_config_arg,
+    load_openhands_config,
+)
 from openhands.core.logger import openhands_logger as logger
 from openhands.llm.llm import LLM

@@ -111,7 +115,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--json_file_path',
        type=str,
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -34,8 +34,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
    load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -273,7 +273,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '-s',
        '--eval-split',
--- a/evaluation/benchmarks/multi_swe_bench/eval_infer.py
+++ b/evaluation/benchmarks/multi_swe_bench/eval_infer.py
@@ -30,7 +30,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
@@ -323,7 +323,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--input-file',
        type=str,
--- a/evaluation/benchmarks/multi_swe_bench/run_infer.py
+++ b/evaluation/benchmarks/multi_swe_bench/run_infer.py
@@ -32,8 +32,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -772,7 +772,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:

 if __name__ == '__main__':
    # pdb.set_trace()
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@@ -21,8 +21,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -239,7 +239,7 @@ If the program uses some packages that are incompatible, please figure out alter


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--use-knowledge',
        type=str,
--- a/evaluation/benchmarks/swe_bench/README.md
+++ b/evaluation/benchmarks/swe_bench/README.md
@@ -183,24 +183,7 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
 - `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
 - `logs/`: a directory of test logs

-### Run evaluation with `RemoteRuntime`

-OpenHands Remote Runtime is currently in beta (read [here](https://runtime.all-hands.dev/) for more details), it allows you to run rollout in parallel in the cloud, so you don't need a powerful machine to run evaluation.
-Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
-
-```bash
-./evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
-
-# Example - This evaluates patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
-evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe-bench-lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_100_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
-```
-
-To clean-up all existing runtimes that you've already started, run:
-
-```bash
-ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
-```

 ## SWT-Bench Evaluation

--- a/evaluation/benchmarks/swe_bench/eval_infer.py
+++ b/evaluation/benchmarks/swe_bench/eval_infer.py
@@ -26,7 +26,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
@@ -353,7 +353,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--input-file',
        type=str,
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -43,8 +43,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -732,7 +732,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/swe_bench/run_infer_interact.py
+++ b/evaluation/benchmarks/swe_bench/run_infer_interact.py
@@ -28,8 +28,8 @@ from evaluation.utils.shared import (
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -201,7 +201,7 @@ def process_instance(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/swe_bench/run_localize.py
+++ b/evaluation/benchmarks/swe_bench/run_localize.py
@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -644,7 +644,7 @@ SWEGYM_EXCLUDE_IDS = [
 ]

 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh
@@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-set -eo pipefail
-
-INPUT_FILE=$1
-NUM_WORKERS=$2
-DATASET=$3
-SPLIT=$4
-
-if [ -z "$INPUT_FILE" ]; then
-  echo "INPUT_FILE not specified (should be a path to a jsonl file)"
-  exit 1
-fi
-
-if [ -z "$DATASET" ]; then
-  echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
-  DATASET="princeton-nlp/SWE-bench_Lite"
-fi
-
-if [ -z "$SPLIT" ]; then
-  echo "SPLIT not specified, use default test"
-  SPLIT="test"
-fi
-
-if [ -z "$NUM_WORKERS" ]; then
-  echo "NUM_WORKERS not specified, use default 1"
-  NUM_WORKERS=1
-fi
-
-echo "... Evaluating on $INPUT_FILE ..."
-
-COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
-  --eval-num-workers $NUM_WORKERS \
-  --input-file $INPUT_FILE \
-  --dataset $DATASET \
-  --split $SPLIT"
-
-if [ -n "$EVAL_LIMIT" ]; then
-  echo "EVAL_LIMIT: $EVAL_LIMIT"
-  COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
-fi
-
-# Run the command
-eval $COMMAND
-
-# update the output with evaluation results
-poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE
--- a/evaluation/benchmarks/testgeneval/NOTES.md
+++ b/evaluation/benchmarks/testgeneval/NOTES.md
@@ -5,8 +5,7 @@ pynguin_ids = ['pydata__xarray-6548-16541', 'pydata__xarray-7003-16557', 'pydata
 ids = ['pydata__xarray-3114-16452', 'pydata__xarray-3151-16453', 'pydata__xarray-3156-16454', 'pydata__xarray-3239-16456', 'pydata__xarray-3239-16457', 'pydata__xarray-3239-16458', 'pydata__xarray-3302-16459', 'pydata__xarray-3364-16461', 'pydata__xarray-3677-16471', 'pydata__xarray-3905-16478', 'pydata__xarray-4182-16484', 'pydata__xarray-4248-16486', 'pydata__xarray-4339-16487', 'pydata__xarray-4419-16488', 'pydata__xarray-4629-16492', 'pydata__xarray-4750-16496', 'pydata__xarray-4802-16505', 'pydata__xarray-4966-16515', 'pydata__xarray-4994-16516', 'pydata__xarray-5033-16517', 'pydata__xarray-5126-16518', 'pydata__xarray-5126-16519', 'pydata__xarray-5131-16520', 'pydata__xarray-5365-16529', 'pydata__xarray-5455-16530', 'pydata__xarray-5662-16532', 'pydata__xarray-5731-16534', 'pydata__xarray-6135-16535', 'pydata__xarray-6135-16536', 'pydata__xarray-6386-16537', 'pydata__xarray-6394-16538', 'pydata__xarray-6400-16539', 'pydata__xarray-6461-16540', 'pydata__xarray-6548-16541', 'pydata__xarray-6599-16543', 'pydata__xarray-6601-16544', 'pydata__xarray-6882-16548', 'pydata__xarray-6889-16549', 'pydata__xarray-7003-16557', 'pydata__xarray-7147-16571', 'pydata__xarray-7150-16572', 'pydata__xarray-7203-16577', 'pydata__xarray-7229-16578', 'pydata__xarray-7393-16581', 'pydata__xarray-7400-16582']


-Command eval (our approach):
-poetry run ./evaluation/benchmarks/testgeneval/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/kjain14__testgeneval-test/CodeActAgent/gpt-4o_maxiter_25_N_v0.20.0-no-hint-run_1/output.jsonl 10 kjain14/testgeneval test true
+

 Command run (our approach):
 ./evaluation/benchmarks/testgeneval/scripts/run_infer.sh llm.eval_gpt HEAD CodeActAgent -1 25 10 kjain14/testgeneval test 1 ../TestGenEval/results/testgeneval/preds/gpt-4o-2024-08-06__testgeneval__0.2__test.jsonl
--- a/evaluation/benchmarks/testgeneval/eval_infer.py
+++ b/evaluation/benchmarks/testgeneval/eval_infer.py
@@ -41,7 +41,7 @@ from evaluation.utils.shared import (
    reset_logger_for_multiprocessing,
    run_evaluation,
 )
-from openhands.core.config import OpenHandsConfig, SandboxConfig, get_parser
+from openhands.core.config import OpenHandsConfig, SandboxConfig, get_evaluation_parser
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
 from openhands.events.action import CmdRunAction
@@ -484,7 +484,7 @@ def count_and_log_fields(evaluated_predictions, fields, key):


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--input-file', type=str, required=True, help='Path to input predictions file'
    )
--- a/evaluation/benchmarks/testgeneval/run_infer.py
+++ b/evaluation/benchmarks/testgeneval/run_infer.py
@@ -37,8 +37,8 @@ from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
    SandboxConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -491,7 +491,7 @@ def prepare_dataset_pre(dataset: pd.DataFrame, filter_column: str) -> pd.DataFra


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -18,8 +18,8 @@ from openhands.core.config import (
    LLMConfig,
    OpenHandsConfig,
    get_agent_config_arg,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
@@ -197,7 +197,7 @@ def run_evaluator(


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--task-image-name',
        type=str,
--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -19,8 +19,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -157,7 +157,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =


 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/evaluation/benchmarks/visual_swe_bench/run_infer.py
+++ b/evaluation/benchmarks/visual_swe_bench/run_infer.py
@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
    AgentConfig,
    OpenHandsConfig,
+    get_evaluation_parser,
    get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -565,7 +565,7 @@ SWEGYM_EXCLUDE_IDS = [
 ]

 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
    parser.add_argument(
        '--dataset',
        type=str,
--- a/frontend/tests/api/file-service/file-service.api.test.ts
+++ b/frontend/tests/api/file-service/file-service.api.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from "vitest";
-import { FileService } from "#/api/file-service/file-service.api";
+import OpenHands from "#/api/open-hands";
 import {
  FILE_VARIANTS_1,
  FILE_VARIANTS_2,
@@ -10,20 +10,20 @@ import {
 * You can find the mock handlers in `frontend/src/mocks/file-service-handlers.ts`.
 */

-describe("FileService", () => {
+describe("OpenHands File API", () => {
  it("should get a list of files", async () => {
-    await expect(FileService.getFiles("test-conversation-id")).resolves.toEqual(
+    await expect(OpenHands.getFiles("test-conversation-id")).resolves.toEqual(
      FILE_VARIANTS_1,
    );

    await expect(
-      FileService.getFiles("test-conversation-id-2"),
+      OpenHands.getFiles("test-conversation-id-2"),
    ).resolves.toEqual(FILE_VARIANTS_2);
  });

  it("should get content of a file", async () => {
    await expect(
-      FileService.getFile("test-conversation-id", "file1.txt"),
+      OpenHands.getFile("test-conversation-id", "file1.txt"),
    ).resolves.toEqual("Content of file1.txt");
  });
 });
--- a/frontend/tests/components/features/chat/launch-microagent-modal.test.tsx
+++ b/frontend/tests/components/features/chat/launch-microagent-modal.test.tsx
@@ -3,8 +3,6 @@ import { afterEach, describe, expect, it, vi } from "vitest";
 import userEvent from "@testing-library/user-event";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
 import { LaunchMicroagentModal } from "#/components/features/chat/microagent/launch-microagent-modal";
-import { MemoryService } from "#/api/memory-service/memory-service.api";
-import { FileService } from "#/api/file-service/file-service.api";
 import { I18nKey } from "#/i18n/declaration";

 vi.mock("react-router", async () => ({
--- a/frontend/tests/utils/extract-model-and-provider.test.ts
+++ b/frontend/tests/utils/extract-model-and-provider.test.ts
@@ -82,5 +82,11 @@ describe("extractModelAndProvider", () => {
      model: "claude-opus-4-20250514",
      separator: "/",
    });
+
+    expect(extractModelAndProvider("claude-opus-4-1-20250805")).toEqual({
+      provider: "anthropic",
+      model: "claude-opus-4-1-20250805",
+      separator: "/",
+    });
  });
 });
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -1,44 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8" />
-    <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
-    <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
-    <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
-    <link rel="manifest" href="/site.webmanifest">
-    <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
-    <meta name="msapplication-TileColor" content="#da532c">
-    <meta name="theme-color" content="#ffffff">
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <meta name="theme-color" content="#000000" />
-    <meta
-      name="description"
-      content="OpenHands: Code Less, Make More"
-    />
-    <!--
-      Notice the use of %PUBLIC_URL% in the tags above.
-      It will be replaced with the URL of the `public` folder during the build.
-      Only files inside the `public` folder can be referenced from the HTML.
-
-      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
-      work correctly both with client-side routing and a non-root public URL.
-      Learn how to configure a non-root public URL by running `npm run build`.
-    -->
-    <title>OpenHands</title>
-  </head>
-  <body>
-    <noscript>You need to enable JavaScript to run this app.</noscript>
-    <div id="root"></div>
-    <!--
-      This HTML file is a template.
-      If you open it directly in the browser, you will see an empty page.
-
-      You can add webfonts, meta tags, or analytics to this file.
-      The build step will place the bundled scripts into the <body> tag.
-
-      To begin the development, run `npm start` or `yarn start`.
-      To create a production bundle, use `npm run build` or `yarn build`.
-    -->
-    <script type="module" src="/src/index.tsx"></script>
-  </body>
-</html>
--- a/frontend/src/api/file-service/file-service.api.ts
+++ b/frontend/src/api/file-service/file-service.api.ts
@@ -1,66 +0,0 @@
-import { openHands } from "../open-hands-axios";
-import { GetFilesResponse, GetFileResponse } from "./file-service.types";
-import { getConversationUrl } from "../conversation.utils";
-import { FileUploadSuccessResponse } from "../open-hands.types";
-
-export class FileService {
-  /**
-   * Retrieve the list of files available in the workspace
-   * @param conversationId ID of the conversation
-   * @param path Path to list files from. If provided, it lists all the files in the given path
-   * @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
-   */
-  static async getFiles(
-    conversationId: string,
-    path?: string,
-  ): Promise<GetFilesResponse> {
-    const url = `${getConversationUrl(conversationId)}/list-files`;
-    const { data } = await openHands.get<GetFilesResponse>(url, {
-      params: { path },
-    });
-
-    return data;
-  }
-
-  /**
-   * Retrieve the content of a file
-   * @param conversationId ID of the conversation
-   * @param path Full path of the file to retrieve
-   * @returns Code content of the file
-   */
-  static async getFile(conversationId: string, path: string): Promise<string> {
-    const url = `${getConversationUrl(conversationId)}/select-file`;
-    const { data } = await openHands.get<GetFileResponse>(url, {
-      params: { file: path },
-    });
-
-    return data.code;
-  }
-
-  /**
-   * Upload multiple files to the workspace
-   * @param conversationId ID of the conversation
-   * @param files List of files.
-   * @returns list of uploaded files, list of skipped files
-   */
-  static async uploadFiles(
-    conversationId: string,
-    files: File[],
-  ): Promise<FileUploadSuccessResponse> {
-    const formData = new FormData();
-    for (const file of files) {
-      formData.append("files", file);
-    }
-    const url = `${getConversationUrl(conversationId)}/upload-files`;
-    const response = await openHands.post<FileUploadSuccessResponse>(
-      url,
-      formData,
-      {
-        headers: {
-          "Content-Type": "multipart/form-data",
-        },
-      },
-    );
-    return response.data;
-  }
-}
--- a/frontend/src/api/file-service/file-service.types.ts
+++ b/frontend/src/api/file-service/file-service.types.ts
@@ -1,5 +0,0 @@
-export type GetFilesResponse = string[];
-
-export interface GetFileResponse {
-  code: string;
-}
--- a/frontend/src/api/memory-service/memory-service.api.ts
+++ b/frontend/src/api/memory-service/memory-service.api.ts
@@ -1,21 +0,0 @@
-import { openHands } from "../open-hands-axios";
-
-interface GetPromptResponse {
-  status: string;
-  prompt: string;
-}
-
-export class MemoryService {
-  static async getPrompt(
-    conversationId: string,
-    eventId: number,
-  ): Promise<string> {
-    const { data } = await openHands.get<GetPromptResponse>(
-      `/api/conversations/${conversationId}/remember_prompt`,
-      {
-        params: { event_id: eventId },
-      },
-    );
-    return data.prompt;
-  }
-}
--- a/frontend/src/api/open-hands.ts
+++ b/frontend/src/api/open-hands.ts
@@ -15,6 +15,9 @@ import {
  GetMicroagentPromptResponse,
  CreateMicroagent,
  MicroagentContentResponse,
+  FileUploadSuccessResponse,
+  GetFilesResponse,
+  GetFileResponse,
 } from "./open-hands.types";
 import { openHands } from "./open-hands-axios";
 import { ApiSettings, PostApiSettings, Provider } from "#/types/settings";
@@ -618,12 +621,11 @@ class OpenHands {
    conversationId: string,
    eventId: number,
  ): Promise<string> {
-    const { data } = await openHands.get<GetMicroagentPromptResponse>(
-      `/api/conversations/${conversationId}/remember_prompt`,
-      {
-        params: { event_id: eventId },
-      },
-    );
+    const url = `${this.getConversationUrl(conversationId)}/remember-prompt`;
+    const { data } = await openHands.get<GetMicroagentPromptResponse>(url, {
+      params: { event_id: eventId },
+      headers: this.getConversationHeaders(),
+    });

    return data.prompt;
  }
@@ -640,6 +642,69 @@ class OpenHands {
    return data;
  }

+  /**
+   * Retrieve the list of files available in the workspace
+   * @param conversationId ID of the conversation
+   * @param path Path to list files from. If provided, it lists all the files in the given path
+   * @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
+   */
+  static async getFiles(
+    conversationId: string,
+    path?: string,
+  ): Promise<GetFilesResponse> {
+    const url = `${this.getConversationUrl(conversationId)}/list-files`;
+    const { data } = await openHands.get<GetFilesResponse>(url, {
+      params: { path },
+      headers: this.getConversationHeaders(),
+    });
+
+    return data;
+  }
+
+  /**
+   * Retrieve the content of a file
+   * @param conversationId ID of the conversation
+   * @param path Full path of the file to retrieve
+   * @returns Code content of the file
+   */
+  static async getFile(conversationId: string, path: string): Promise<string> {
+    const url = `${this.getConversationUrl(conversationId)}/select-file`;
+    const { data } = await openHands.get<GetFileResponse>(url, {
+      params: { file: path },
+      headers: this.getConversationHeaders(),
+    });
+
+    return data.code;
+  }
+
+  /**
+   * Upload multiple files to the workspace
+   * @param conversationId ID of the conversation
+   * @param files List of files.
+   * @returns list of uploaded files, list of skipped files
+   */
+  static async uploadFiles(
+    conversationId: string,
+    files: File[],
+  ): Promise<FileUploadSuccessResponse> {
+    const formData = new FormData();
+    for (const file of files) {
+      formData.append("files", file);
+    }
+    const url = `${this.getConversationUrl(conversationId)}/upload-files`;
+    const response = await openHands.post<FileUploadSuccessResponse>(
+      url,
+      formData,
+      {
+        headers: {
+          "Content-Type": "multipart/form-data",
+          ...this.getConversationHeaders(),
+        },
+      },
+    );
+    return response.data;
+  }
+
  /**
   * Get the user installation IDs
   * @param provider The provider to get installation IDs for (github, bitbucket, etc.)
--- a/frontend/src/api/open-hands.types.ts
+++ b/frontend/src/api/open-hands.types.ts
@@ -158,3 +158,9 @@ export interface MicroagentContentResponse {
  git_provider: Provider;
  triggers: string[];
 }
+
+export type GetFilesResponse = string[];
+
+export interface GetFileResponse {
+  code: string;
+}
--- a/frontend/src/hooks/mutation/use-upload-files.ts
+++ b/frontend/src/hooks/mutation/use-upload-files.ts
@@ -1,11 +1,11 @@
 import { useMutation } from "@tanstack/react-query";
-import { FileService } from "#/api/file-service/file-service.api";
+import OpenHands from "#/api/open-hands";

 export const useUploadFiles = () =>
  useMutation({
    mutationKey: ["upload-files"],
    mutationFn: (variables: { conversationId: string; files: File[] }) =>
-      FileService.uploadFiles(variables.conversationId!, variables.files),
+      OpenHands.uploadFiles(variables.conversationId!, variables.files),
    onSuccess: async () => {},
    meta: {
      disableToast: true,
--- a/frontend/src/hooks/query/use-get-microagents.ts
+++ b/frontend/src/hooks/query/use-get-microagents.ts
@@ -1,13 +1,13 @@
 import { useQuery } from "@tanstack/react-query";
 import { useConversationId } from "../use-conversation-id";
-import { FileService } from "#/api/file-service/file-service.api";
+import OpenHands from "#/api/open-hands";

 export const useGetMicroagents = (microagentDirectory: string) => {
  const { conversationId } = useConversationId();

  return useQuery({
    queryKey: ["files", "microagents", conversationId, microagentDirectory],
-    queryFn: () => FileService.getFiles(conversationId!, microagentDirectory),
+    queryFn: () => OpenHands.getFiles(conversationId!, microagentDirectory),
    enabled: !!conversationId,
    select: (data) =>
      data.map((fileName) => fileName.replace(microagentDirectory, "")),
--- a/frontend/src/hooks/query/use-microagent-prompt.ts
+++ b/frontend/src/hooks/query/use-microagent-prompt.ts
@@ -1,5 +1,5 @@
 import { useQuery } from "@tanstack/react-query";
-import { MemoryService } from "#/api/memory-service/memory-service.api";
+import OpenHands from "#/api/open-hands";
 import { useConversationId } from "../use-conversation-id";

 export const useMicroagentPrompt = (eventId: number) => {
@@ -7,7 +7,7 @@ export const useMicroagentPrompt = (eventId: number) => {

  return useQuery({
    queryKey: ["memory", "prompt", conversationId, eventId],
-    queryFn: () => MemoryService.getPrompt(conversationId!, eventId),
+    queryFn: () => OpenHands.getMicroagentPrompt(conversationId!, eventId),
    enabled: !!conversationId,
    staleTime: 1000 * 60 * 5, // 5 minutes
    gcTime: 1000 * 60 * 15, // 15 minutes
--- a/frontend/src/routes/app-settings.tsx
+++ b/frontend/src/routes/app-settings.tsx
@@ -222,7 +222,7 @@ function AppSettingsScreen() {
            className="w-full max-w-[680px]" // Match the width of the language field
          />

-          <div className="border-t border-t-tertiary pt-6 mt-2">
+          <div className="border-t border-t-tertiary pt-6 mt-2 hidden">
            <h3 className="text-lg font-medium mb-4">
              {t(I18nKey.SETTINGS$GIT_SETTINGS)}
            </h3>
--- a/frontend/src/utils/verified-models.ts
+++ b/frontend/src/utils/verified-models.ts
@@ -14,6 +14,7 @@ export const VERIFIED_MODELS = [
  "claude-3-7-sonnet-20250219",
  "claude-sonnet-4-20250514",
  "claude-opus-4-20250514",
+  "claude-opus-4-1-20250805",
  "gemini-2.5-pro",
  "o4-mini",
  "deepseek-chat",
@@ -22,11 +23,13 @@ export const VERIFIED_MODELS = [
  "devstral-medium-2507",
  "kimi-k2-0711-preview",
  "qwen3-coder-480b",
+  "gpt-5-2025-08-07",
 ];

 // LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
 // (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
 export const VERIFIED_OPENAI_MODELS = [
+  "gpt-5-2025-08-07",
  "gpt-4o",
  "gpt-4o-mini",
  "gpt-4.1",
@@ -47,6 +50,7 @@ export const VERIFIED_ANTHROPIC_MODELS = [
  "claude-3-7-sonnet-20250219",
  "claude-sonnet-4-20250514",
  "claude-opus-4-20250514",
+  "claude-opus-4-1-20250805",
 ];

 // LiteLLM does not return the compatible Mistral models with the provider, so we list them here to set them ourselves
@@ -61,7 +65,9 @@ export const VERIFIED_MISTRAL_MODELS = [
 // (e.g., they return `claude-sonnet-4-20250514` instead of `openhands/claude-sonnet-4-20250514`)
 export const VERIFIED_OPENHANDS_MODELS = [
  "claude-sonnet-4-20250514",
+  "gpt-5-2025-08-07",
  "claude-opus-4-20250514",
+  "claude-opus-4-1-20250805",
  "gemini-2.5-pro",
  "o3",
  "o4-mini",
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -106,10 +106,15 @@ class CodeActAgent(Agent):
    def _get_tools(self) -> list['ChatCompletionToolParam']:
        # For these models, we use short tool descriptions ( < 1024 tokens)
        # to avoid hitting the OpenAI token limit for tool descriptions.
-        SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
+        SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-4', 'o3', 'o1', 'o4']

        use_short_tool_desc = False
        if self.llm is not None:
+            # For historical reasons, previously OpenAI enforces max function description length of 1k characters
+            # https://community.openai.com/t/function-call-description-max-length/529902
+            # But it no longer seems to be an issue recently
+            # https://community.openai.com/t/was-the-character-limit-for-schema-descriptions-upgraded/1225975
+            # Tested on GPT-5 and longer description still works. But we still keep the logic to be safe for older models.
            use_short_tool_desc = any(
                model_substr in self.llm.config.model
                for model_substr in SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS
--- a/openhands/agenthub/codeact_agent/tools/bash.py
+++ b/openhands/agenthub/codeact_agent/tools/bash.py
@@ -1,3 +1,4 @@
+import re
 import sys

 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
@@ -37,7 +38,16 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.

 def refine_prompt(prompt: str):
    if sys.platform == 'win32':
-        return prompt.replace('bash', 'powershell')
+        # Replace 'bash' with 'powershell' including tool names like 'execute_bash'
+        # First replace 'execute_bash' with 'execute_powershell' to handle tool names
+        result = re.sub(
+            r'\bexecute_bash\b', 'execute_powershell', prompt, flags=re.IGNORECASE
+        )
+        # Then replace standalone 'bash' with 'powershell'
+        result = re.sub(
+            r'(?<!execute_)(?<!_)\bbash\b', 'powershell', result, flags=re.IGNORECASE
+        )
+        return result
    return prompt


--- a/openhands/cli/init.py
+++ b/openhands/cli/init.py
@@ -0,0 +1 @@
+"""OpenHands CLI module."""
--- a/openhands/cli/entry.py
+++ b/openhands/cli/entry.py
@@ -0,0 +1,54 @@
+"""Main entry point for OpenHands CLI with subcommand support."""
+
+import sys
+
+import openhands
+import openhands.cli.suppress_warnings  # noqa: F401
+from openhands.cli.gui_launcher import launch_gui_server
+from openhands.cli.main import run_cli_command
+from openhands.core.config import get_cli_parser
+from openhands.core.config.arg_utils import get_subparser
+
+
+def main():
+    """Main entry point with subcommand support and backward compatibility."""
+    parser = get_cli_parser()
+
+    # If user only asks for --help or -h without a subcommand
+    if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'):
+        # Print top-level help
+        print(parser.format_help())
+
+        # Also print help for `cli` subcommand
+        print('\n' + '=' * 80)
+        print('CLI command help:\n')
+
+        cli_parser = get_subparser(parser, 'cli')
+        print(cli_parser.format_help())
+
+        sys.exit(0)
+
+    # Special case: no subcommand provided, simulate "openhands cli"
+    if len(sys.argv) == 1 or (
+        len(sys.argv) > 1 and sys.argv[1] not in ['cli', 'serve']
+    ):
+        # Inject 'cli' as default command
+        sys.argv.insert(1, 'cli')
+
+    args = parser.parse_args()
+
+    if hasattr(args, 'version') and args.version:
+        print(f'OpenHands CLI version: {openhands.get_version()}')
+        sys.exit(0)
+
+    if args.command == 'serve':
+        launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
+    elif args.command == 'cli' or args.command is None:
+        run_cli_command(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/openhands/cli/gui_launcher.py
+++ b/openhands/cli/gui_launcher.py
@@ -0,0 +1,219 @@
+"""GUI launcher for OpenHands CLI."""
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+
+from openhands import __version__
+
+
+def _format_docker_command_for_logging(cmd: list[str]) -> str:
+    """Format a Docker command for logging with grey color.
+
+    Args:
+        cmd (list[str]): The Docker command as a list of strings
+
+    Returns:
+        str: The formatted command string in grey HTML color
+    """
+    cmd_str = ' '.join(cmd)
+    return f'<grey>Running Docker command: {cmd_str}</grey>'
+
+
+def check_docker_requirements() -> bool:
+    """Check if Docker is installed and running.
+
+    Returns:
+        bool: True if Docker is available and running, False otherwise.
+    """
+    # Check if Docker is installed
+    if not shutil.which('docker'):
+        print_formatted_text(
+            HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
+        )
+        print_formatted_text(
+            HTML(
+                '<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
+            )
+        )
+        return False
+
+    # Check if Docker daemon is running
+    try:
+        result = subprocess.run(
+            ['docker', 'info'], capture_output=True, text=True, timeout=10
+        )
+        if result.returncode != 0:
+            print_formatted_text(
+                HTML('<ansired>❌ Docker daemon is not running.</ansired>')
+            )
+            print_formatted_text(
+                HTML('<grey>Please start Docker and try again.</grey>')
+            )
+            return False
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to check Docker status.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        return False
+
+    return True
+
+
+def ensure_config_dir_exists() -> Path:
+    """Ensure the OpenHands configuration directory exists and return its path."""
+    config_dir = Path.home() / '.openhands'
+    config_dir.mkdir(exist_ok=True)
+    return config_dir
+
+
+def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
+    """Launch the OpenHands GUI server using Docker.
+
+    Args:
+        mount_cwd: If True, mount the current working directory into the container.
+        gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
+    """
+    print_formatted_text(
+        HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
+    )
+    print_formatted_text('')
+
+    # Check Docker requirements
+    if not check_docker_requirements():
+        sys.exit(1)
+
+    # Ensure config directory exists
+    config_dir = ensure_config_dir_exists()
+
+    # Get the current version for the Docker image
+    version = __version__
+    runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
+    app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
+
+    print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
+
+    # Pull the runtime image first
+    pull_cmd = ['docker', 'pull', runtime_image]
+    print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
+    try:
+        subprocess.run(
+            pull_cmd,
+            check=True,
+            timeout=300,  # 5 minutes timeout
+        )
+    except subprocess.CalledProcessError:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
+        )
+        sys.exit(1)
+    except subprocess.TimeoutExpired:
+        print_formatted_text(
+            HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
+        )
+        sys.exit(1)
+
+    print_formatted_text('')
+    print_formatted_text(
+        HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
+    )
+    print_formatted_text(
+        HTML('<grey>The server will be available at: http://localhost:3000</grey>')
+    )
+    print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
+    print_formatted_text('')
+
+    # Build the Docker command
+    docker_cmd = [
+        'docker',
+        'run',
+        '-it',
+        '--rm',
+        '--pull=always',
+        '-e',
+        f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
+        '-e',
+        'LOG_ALL_EVENTS=true',
+        '-v',
+        '/var/run/docker.sock:/var/run/docker.sock',
+        '-v',
+        f'{config_dir}:/.openhands',
+    ]
+
+    # Add GPU support if requested
+    if gpu:
+        print_formatted_text(
+            HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
+        )
+        # Add the --gpus all flag to enable all GPUs
+        docker_cmd.insert(2, '--gpus')
+        docker_cmd.insert(3, 'all')
+        # Add environment variable to pass GPU support to sandbox containers
+        docker_cmd.extend(
+            [
+                '-e',
+                'SANDBOX_ENABLE_GPU=true',
+            ]
+        )
+
+    # Add current working directory mount if requested
+    if mount_cwd:
+        cwd = Path.cwd()
+        # Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
+        docker_cmd.extend(
+            [
+                '-e',
+                f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
+            ]
+        )
+
+        # Set user ID for Unix-like systems only
+        if os.name != 'nt':  # Not Windows
+            try:
+                user_id = subprocess.check_output(['id', '-u'], text=True).strip()
+                docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
+            except (subprocess.CalledProcessError, FileNotFoundError):
+                # If 'id' command fails or doesn't exist, skip setting user ID
+                pass
+        # Print the folder that will be mounted to inform the user
+        print_formatted_text(
+            HTML(
+                f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
+            )
+        )
+
+    docker_cmd.extend(
+        [
+            '-p',
+            '3000:3000',
+            '--add-host',
+            'host.docker.internal:host-gateway',
+            '--name',
+            'openhands-app',
+            app_image,
+        ]
+    )
+
+    try:
+        # Log and run the Docker command
+        print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
+        subprocess.run(docker_cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
+        )
+        sys.exit(0)
--- a/openhands/cli/main.py
+++ b/openhands/cli/main.py
@@ -45,7 +45,6 @@ from openhands.controller import AgentController
 from openhands.controller.agent import Agent
 from openhands.core.config import (
    OpenHandsConfig,
-    parse_arguments,
    setup_config_from_args,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
@@ -129,12 +128,13 @@ async def run_session(
    conversation_instructions: str | None = None,
    session_name: str | None = None,
    skip_banner: bool = False,
+    conversation_id: str | None = None,
 ) -> bool:
    reload_microagents = False
    new_session_requested = False
    exit_reason = ExitReason.INTENTIONAL

-    sid = generate_sid(config, session_name)
+    sid = conversation_id or generate_sid(config, session_name)
    is_loaded = asyncio.Event()
    is_paused = asyncio.Event()  # Event to track agent pause requests
    always_confirm_mode = False  # Flag to enable always confirm mode
@@ -523,10 +523,8 @@ def run_alias_setup_flow(config: OpenHandsConfig) -> None:
    print_formatted_text('')


-async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
+async def main_with_loop(loop: asyncio.AbstractEventLoop, args) -> None:
    """Runs the agent in CLI mode."""
-    args = parse_arguments()
-
    # Set log level from command line argument if provided
    if args.log_level and isinstance(args.log_level, str):
        log_level = getattr(logging, str(args.log_level).upper())
@@ -574,13 +572,9 @@ async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:

    # Use settings from settings store if available and override with command line arguments
    if settings:
-        # Handle agent configuration
-        if args.agent_cls:
-            config.default_agent = str(args.agent_cls)
-        else:
-            # settings.agent is not None because we check for it in setup_config_from_args
-            assert settings.agent is not None
-            config.default_agent = settings.agent
+        # settings.agent is not None because we check for it in setup_config_from_args
+        assert settings.agent is not None
+        config.default_agent = settings.agent

        # Handle LLM configuration with proper precedence:
        # 1. CLI parameters (-l) have highest precedence (already handled in setup_config_from_args)
@@ -705,6 +699,7 @@ After reviewing the file, please ask the user what they would like to do with it
        task_str,
        session_name=args.name,
        skip_banner=banner_shown,
+        conversation_id=args.conversation,
    )

    # If a new session was requested, run it
@@ -717,18 +712,19 @@ After reviewing the file, please ask the user what they would like to do with it
    get_runtime_cls(config.runtime).teardown(config)


-def main():
+def run_cli_command(args):
+    """Run the CLI command with proper error handling and cleanup."""
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
-        loop.run_until_complete(main_with_loop(loop))
+        loop.run_until_complete(main_with_loop(loop, args))
    except KeyboardInterrupt:
        print_formatted_text('⚠️ Session was interrupted: interrupted\n')
    except ConnectionRefusedError as e:
-        print(f'Connection refused: {e}')
+        print_formatted_text(f'Connection refused: {e}')
        sys.exit(1)
    except Exception as e:
-        print(f'An error occurred: {e}')
+        print_formatted_text(f'An error occurred: {e}')
        sys.exit(1)
    finally:
        try:
@@ -741,9 +737,5 @@ def main():
            loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
            loop.close()
        except Exception as e:
-            print(f'Error during cleanup: {e}')
+            print_formatted_text(f'Error during cleanup: {e}')
            sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/openhands/cli/settings.py
+++ b/openhands/cli/settings.py
@@ -27,7 +27,7 @@ from openhands.core.config.condenser_config import (
    CondenserPipelineConfig,
    ConversationWindowCondenserConfig,
 )
-from openhands.core.config.utils import OH_DEFAULT_AGENT
+from openhands.core.config.config_utils import OH_DEFAULT_AGENT
 from openhands.memory.condenser.impl.llm_summarizing_condenser import (
    LLMSummarizingCondenserConfig,
 )
--- a/openhands/cli/tui.py
+++ b/openhands/cli/tui.py
@@ -5,7 +5,9 @@
 import asyncio
 import contextlib
 import datetime
+import io
 import json
+import shutil
 import sys
 import threading
 import time
@@ -28,6 +30,8 @@ from prompt_toolkit.patch_stdout import patch_stdout
 from prompt_toolkit.shortcuts import print_container
 from prompt_toolkit.styles import Style
 from prompt_toolkit.widgets import Frame, TextArea
+from rich.console import Console
+from rich.markdown import Markdown

 from openhands import __version__
 from openhands.core.config import OpenHandsConfig
@@ -36,6 +40,7 @@ from openhands.events import EventSource, EventStream
 from openhands.events.action import (
    Action,
    ActionConfirmationStatus,
+    AgentFinishAction,
    ChangeAgentStateAction,
    CmdRunAction,
    MCPAction,
@@ -65,10 +70,12 @@ MAX_RECENT_THOUGHTS = 5
 # Color and styling constants
 COLOR_GOLD = '#FFD700'
 COLOR_GREY = '#808080'
+COLOR_AGENT_BLUE = '#5FAFFF'  # Soft blue for all agent outputs
 DEFAULT_STYLE = Style.from_dict(
    {
        'gold': COLOR_GOLD,
        'grey': COLOR_GREY,
+        'agent-blue': COLOR_AGENT_BLUE,
        'prompt': f'{COLOR_GOLD} bold',
    }
 )
@@ -252,7 +259,19 @@ def display_thought_if_new(thought: str) -> None:
 def display_event(event: Event, config: OpenHandsConfig) -> None:
    global streaming_output_text_area
    with print_lock:
-        if isinstance(event, CmdRunAction):
+        if isinstance(event, AgentFinishAction):
+            # Handle agent finish actions with special styling
+            # Determine the message to display
+            if event.final_thought:
+                message = event.final_thought
+            elif event.thought:
+                message = event.thought
+            else:
+                message = "All done! What's next on the agenda?"
+            
+            # Display with finish styling
+            display_agent_message(message, is_finish=True)
+        elif isinstance(event, CmdRunAction):
            # For CmdRunAction, display thought first, then command
            if hasattr(event, 'thought') and event.thought:
                display_message(event.thought)
@@ -275,8 +294,8 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:

        if isinstance(event, MessageAction):
            if event.source == EventSource.AGENT:
-                # Check if this message content is a duplicate thought
-                display_thought_if_new(event.content)
+                # Display agent messages with distinctive styling
+                display_agent_message(event.content)
        elif isinstance(event, CmdOutputObservation):
            display_command_output(event.content)
        elif isinstance(event, FileEditObservation):
@@ -291,6 +310,24 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
            display_error(event.content)


+def process_markdown_for_terminal(text: str) -> str:
+    """
+    Process markdown syntax for terminal display using Rich.
+    This function renders markdown as formatted text for the terminal.
+    """
+    if not text:
+        return text
+    
+    # Use Rich to render the markdown without width constraints
+    console = Console(file=io.StringIO(), highlight=False, width=None)
+    console.print(Markdown(text))
+    
+    # Get the rendered output
+    rendered_text = console.file.getvalue()  # type: ignore
+    
+    return rendered_text.strip()
+
+
 def display_message(message: str) -> None:
    message = message.strip()

@@ -298,6 +335,38 @@ def display_message(message: str) -> None:
        print_formatted_text(f'\n{message}')


+def display_agent_message(message: str, is_finish: bool = False) -> None:
+    """
+    Display a message from the agent with distinctive styling and markdown rendering.
+    
+    Args:
+        message: The message content to display
+        is_finish: Whether this is a finish message (changes the icon)
+    """
+    message = message.strip()
+
+    if message:
+        # Process markdown in the message
+        try:
+            # Process markdown for terminal display
+            processed_message = process_markdown_for_terminal(message)
+        except Exception:
+            # If markdown processing fails, use the original message
+            processed_message = message
+
+        # Choose the appropriate icon based on message type
+        icon = '🎯' if is_finish else '🔹'
+        header_text = 'Agent Finished' if is_finish else 'Agent Message'
+        
+        # Print a simple header
+        print_formatted_text(FormattedText([('fg:' + COLOR_AGENT_BLUE, f'\n{icon} {header_text}')]))
+        print_formatted_text('')
+        
+        # Print the message content directly without any wrapping constraints
+        print_formatted_text(FormattedText([('fg:' + COLOR_AGENT_BLUE, processed_message)]))
+        print_formatted_text('')
+
+
 def display_error(error: str) -> None:
    error = error.strip()

--- a/openhands/cli/utils.py
+++ b/openhands/cli/utils.py
@@ -150,6 +150,7 @@ def organize_models_and_providers(
 VERIFIED_PROVIDERS = ['openhands', 'anthropic', 'openai', 'mistral']

 VERIFIED_OPENAI_MODELS = [
+    'gpt-5-2025-08-07',
    'o4-mini',
    'gpt-4o',
    'gpt-4o-mini',
@@ -164,6 +165,7 @@ VERIFIED_OPENAI_MODELS = [
 VERIFIED_ANTHROPIC_MODELS = [
    'claude-sonnet-4-20250514',
    'claude-opus-4-20250514',
+    'claude-opus-4-1-20250805',
    'claude-3-7-sonnet-20250219',
    'claude-3-sonnet-20240229',
    'claude-3-opus-20240229',
@@ -183,7 +185,9 @@ VERIFIED_MISTRAL_MODELS = [

 VERIFIED_OPENHANDS_MODELS = [
    'claude-sonnet-4-20250514',
+    'gpt-5-2025-08-07',
    'claude-opus-4-20250514',
+    'claude-opus-4-1-20250805',
    'devstral-small-2507',
    'devstral-medium-2507',
    'o3',
--- a/openhands/core/config/init.py
+++ b/openhands/core/config/init.py
@@ -1,4 +1,9 @@
 from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.arg_utils import (
+    get_cli_parser,
+    get_evaluation_parser,
+    get_headless_parser,
+)
 from openhands.core.config.cli_config import CLIConfig
 from openhands.core.config.config_utils import (
    OH_DEFAULT_AGENT,
@@ -15,7 +20,6 @@ from openhands.core.config.utils import (
    finalize_config,
    get_agent_config_arg,
    get_llm_config_arg,
-    get_parser,
    load_from_env,
    load_from_toml,
    load_openhands_config,
@@ -41,7 +45,9 @@ __all__ = [
    'get_agent_config_arg',
    'get_llm_config_arg',
    'get_field_info',
-    'get_parser',
+    'get_cli_parser',
+    'get_headless_parser',
+    'get_evaluation_parser',
    'parse_arguments',
    'setup_config_from_args',
 ]
--- a/openhands/core/config/arg_utils.py
+++ b/openhands/core/config/arg_utils.py
@@ -0,0 +1,224 @@
+"""Centralized command line argument configuration for OpenHands CLI and headless modes."""
+
+import argparse
+from argparse import ArgumentParser, _SubParsersAction
+
+
+def get_subparser(parser: ArgumentParser, name: str) -> ArgumentParser:
+    for action in parser._actions:
+        if isinstance(action, _SubParsersAction):
+            if name in action.choices:
+                return action.choices[name]
+    raise ValueError(f"Subparser '{name}' not found")
+
+
+def add_common_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add common arguments shared between CLI and headless modes."""
+    parser.add_argument(
+        '--config-file',
+        type=str,
+        default='config.toml',
+        help='Path to the config file (default: config.toml in the current directory)',
+    )
+    parser.add_argument(
+        '-t',
+        '--task',
+        type=str,
+        default='',
+        help='The task for the agent to perform',
+    )
+    parser.add_argument(
+        '-f',
+        '--file',
+        type=str,
+        help='Path to a file containing the task. Overrides -t if both are provided.',
+    )
+    parser.add_argument(
+        '-n',
+        '--name',
+        help='Session name',
+        type=str,
+        default='',
+    )
+    parser.add_argument(
+        '--log-level',
+        help='Set the log level',
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        '-l',
+        '--llm-config',
+        default=None,
+        type=str,
+        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
+    )
+    parser.add_argument(
+        '--agent-config',
+        default=None,
+        type=str,
+        help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
+    )
+    parser.add_argument(
+        '-v', '--version', action='store_true', help='Show version information'
+    )
+
+
+def add_evaluation_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add arguments specific to evaluation mode."""
+    # Evaluation-specific arguments
+    parser.add_argument(
+        '--eval-output-dir',
+        default='evaluation/evaluation_outputs/outputs',
+        type=str,
+        help='The directory to save evaluation output',
+    )
+    parser.add_argument(
+        '--eval-n-limit',
+        default=None,
+        type=int,
+        help='The number of instances to evaluate',
+    )
+    parser.add_argument(
+        '--eval-num-workers',
+        default=4,
+        type=int,
+        help='The number of workers to use for evaluation',
+    )
+    parser.add_argument(
+        '--eval-note',
+        default=None,
+        type=str,
+        help='The note to add to the evaluation directory',
+    )
+    parser.add_argument(
+        '--eval-ids',
+        default=None,
+        type=str,
+        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
+    )
+
+
+def add_headless_specific_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add arguments specific to headless mode (full evaluation suite)."""
+    parser.add_argument(
+        '-d',
+        '--directory',
+        type=str,
+        help='The working directory for the agent',
+    )
+    parser.add_argument(
+        '-c',
+        '--agent-cls',
+        default=None,
+        type=str,
+        help='Name of the default agent to use',
+    )
+    parser.add_argument(
+        '-i',
+        '--max-iterations',
+        default=None,
+        type=int,
+        help='The maximum number of iterations to run the agent',
+    )
+    parser.add_argument(
+        '-b',
+        '--max-budget-per-task',
+        type=float,
+        help='The maximum budget allowed per task, beyond which the agent will stop.',
+    )
+    # Additional headless-specific arguments
+    parser.add_argument(
+        '--no-auto-continue',
+        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
+        action='store_true',
+        default=False,
+    )
+    parser.add_argument(
+        '--selected-repo',
+        help='GitHub repository to clone (format: owner/repo)',
+        type=str,
+        default=None,
+    )
+
+
+def get_cli_parser() -> argparse.ArgumentParser:
+    """Create argument parser for CLI mode with simplified argument set."""
+    # Create a description with welcome message explaining available commands
+    description = (
+        'Welcome to OpenHands: Code Less, Make More\n\n'
+        'OpenHands supports two main commands:\n'
+        '  serve - Launch the OpenHands GUI server (web interface)\n'
+        '  cli   - Run OpenHands in CLI mode (terminal interface)\n\n'
+        'Running "openhands" without a command is the same as "openhands cli"'
+    )
+
+    parser = argparse.ArgumentParser(
+        description=description,
+        prog='openhands',
+        formatter_class=argparse.RawDescriptionHelpFormatter,  # Preserve formatting in description
+        epilog='For more information about a command, run: openhands COMMAND --help',
+    )
+
+    # Create subparsers
+    subparsers = parser.add_subparsers(
+        dest='command',
+        title='commands',
+        description='OpenHands supports two main commands:',
+        metavar='COMMAND',
+    )
+
+    # Add 'serve' subcommand
+    serve_parser = subparsers.add_parser(
+        'serve', help='Launch the OpenHands GUI server using Docker (web interface)'
+    )
+    serve_parser.add_argument(
+        '--mount-cwd',
+        help='Mount the current working directory into the GUI server container',
+        action='store_true',
+        default=False,
+    )
+    serve_parser.add_argument(
+        '--gpu',
+        help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
+        action='store_true',
+        default=False,
+    )
+
+    # Add 'cli' subcommand - import all the existing CLI arguments
+    cli_parser = subparsers.add_parser(
+        'cli', help='Run OpenHands in CLI mode (terminal interface)'
+    )
+    add_common_arguments(cli_parser)
+
+    cli_parser.add_argument(
+        '--override-cli-mode',
+        help='Override the default settings for CLI mode',
+        type=bool,
+        default=False,
+    )
+    parser.add_argument(
+        '--conversation',
+        help='The conversation id to continue',
+        type=str,
+        default=None,
+    )
+
+    return parser
+
+
+def get_headless_parser() -> argparse.ArgumentParser:
+    """Create argument parser for headless mode with full argument set."""
+    parser = argparse.ArgumentParser(description='Run the agent via CLI')
+    add_common_arguments(parser)
+    add_headless_specific_arguments(parser)
+    return parser
+
+
+def get_evaluation_parser() -> argparse.ArgumentParser:
+    """Create argument parser for evaluation mode."""
+    parser = argparse.ArgumentParser(description='Run OpenHands in evaluation mode')
+    add_common_arguments(parser)
+    add_headless_specific_arguments(parser)
+    add_evaluation_arguments(parser)
+    return parser
--- a/openhands/core/config/mcp_config.py
+++ b/openhands/core/config/mcp_config.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import re
 import shlex
@@ -302,6 +304,13 @@ class MCPConfig(BaseModel):
            raise ValueError(f'Invalid MCP configuration: {e}')
        return mcp_mapping

+    def merge(self, other: MCPConfig):
+        return MCPConfig(
+            sse_servers=self.sse_servers + other.sse_servers,
+            stdio_servers=self.stdio_servers + other.stdio_servers,
+            shttp_servers=self.shttp_servers + other.shttp_servers,
+        )
+

 class OpenHandsMCPConfig:
    @staticmethod
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -15,15 +15,12 @@ from pydantic import BaseModel, SecretStr, ValidationError
 from openhands import __version__
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.arg_utils import get_headless_parser
 from openhands.core.config.condenser_config import (
    CondenserConfig,
    condenser_config_from_toml_section,
    create_condenser_config,
 )
-from openhands.core.config.config_utils import (
-    OH_DEFAULT_AGENT,
-    OH_MAX_ITERATIONS,
-)
 from openhands.core.config.extended_config import ExtendedConfig
 from openhands.core.config.kubernetes_config import KubernetesConfig
 from openhands.core.config.llm_config import LLMConfig
@@ -674,142 +671,9 @@ def get_condenser_config_arg(
        return None


-# Command line arguments
-def get_parser() -> argparse.ArgumentParser:
-    """Get the argument parser."""
-    parser = argparse.ArgumentParser(description='Run the agent via CLI')
-
-    # Add version argument
-    parser.add_argument(
-        '-v', '--version', action='store_true', help='Show version information'
-    )
-
-    parser.add_argument(
-        '--config-file',
-        type=str,
-        default='config.toml',
-        help='Path to the config file (default: config.toml in the current directory)',
-    )
-    parser.add_argument(
-        '-d',
-        '--directory',
-        type=str,
-        help='The working directory for the agent',
-    )
-    parser.add_argument(
-        '-t',
-        '--task',
-        type=str,
-        default='',
-        help='The task for the agent to perform',
-    )
-    parser.add_argument(
-        '-f',
-        '--file',
-        type=str,
-        help='Path to a file containing the task. Overrides -t if both are provided.',
-    )
-    parser.add_argument(
-        '-c',
-        '--agent-cls',
-        default=OH_DEFAULT_AGENT,
-        type=str,
-        help='Name of the default agent to use',
-    )
-    parser.add_argument(
-        '-i',
-        '--max-iterations',
-        default=OH_MAX_ITERATIONS,
-        type=int,
-        help='The maximum number of iterations to run the agent',
-    )
-    parser.add_argument(
-        '-b',
-        '--max-budget-per-task',
-        type=float,
-        help='The maximum budget allowed per task, beyond which the agent will stop.',
-    )
-    # --eval configs are for evaluations only
-    parser.add_argument(
-        '--eval-output-dir',
-        default='evaluation/evaluation_outputs/outputs',
-        type=str,
-        help='The directory to save evaluation output',
-    )
-    parser.add_argument(
-        '--eval-n-limit',
-        default=None,
-        type=int,
-        help='The number of instances to evaluate',
-    )
-    parser.add_argument(
-        '--eval-num-workers',
-        default=4,
-        type=int,
-        help='The number of workers to use for evaluation',
-    )
-    parser.add_argument(
-        '--eval-note',
-        default=None,
-        type=str,
-        help='The note to add to the evaluation directory',
-    )
-    parser.add_argument(
-        '-l',
-        '--llm-config',
-        default=None,
-        type=str,
-        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
-    )
-    parser.add_argument(
-        '--agent-config',
-        default=None,
-        type=str,
-        help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
-    )
-    parser.add_argument(
-        '-n',
-        '--name',
-        help='Session name',
-        type=str,
-        default='',
-    )
-    parser.add_argument(
-        '--eval-ids',
-        default=None,
-        type=str,
-        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
-    )
-    parser.add_argument(
-        '--no-auto-continue',
-        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
-        action='store_true',
-        default=False,
-    )
-    parser.add_argument(
-        '--selected-repo',
-        help='GitHub repository to clone (format: owner/repo)',
-        type=str,
-        default=None,
-    )
-    parser.add_argument(
-        '--override-cli-mode',
-        help='Override the default settings for CLI mode',
-        type=bool,
-        default=False,
-    )
-    parser.add_argument(
-        '--log-level',
-        help='Set the log level',
-        type=str,
-        default=None,
-    )
-    return parser
-
-
 def parse_arguments() -> argparse.Namespace:
    """Parse command line arguments."""
-    parser = get_parser()
+    parser = get_headless_parser()
    args = parser.parse_args()

    if args.version:
@@ -914,17 +778,17 @@ def setup_config_from_args(args: argparse.Namespace) -> OpenHandsConfig:
        )

    # Override default agent if provided
-    if args.agent_cls:
+    if hasattr(args, 'agent_cls') and args.agent_cls:
        config.default_agent = args.agent_cls

    # Set max iterations and max budget per task if provided, otherwise fall back to config values
-    if args.max_iterations is not None:
+    if hasattr(args, 'max_iterations') and args.max_iterations is not None:
        config.max_iterations = args.max_iterations
-    if args.max_budget_per_task is not None:
+    if hasattr(args, 'max_budget_per_task') and args.max_budget_per_task is not None:
        config.max_budget_per_task = args.max_budget_per_task

    # Read selected repository in config for use by CLI and main.py
-    if args.selected_repo is not None:
+    if hasattr(args, 'selected_repo') and args.selected_repo is not None:
        config.sandbox.selected_repo = args.selected_repo

    return config
--- a/openhands/llm/fn_call_converter.py
+++ b/openhands/llm/fn_call_converter.py
@@ -383,7 +383,7 @@ Do NOT assume the environment is the same as in the example above.
 """
    example = example.lstrip()

-    return example
+    return refine_prompt(example)


 IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -63,6 +63,7 @@ CACHE_PROMPT_SUPPORTED_MODELS = [
    'claude-sonnet-4-20250514',
    'claude-sonnet-4',
    'claude-opus-4-20250514',
+    'claude-opus-4-1-20250805',
 ]

 # function calling supporting models
@@ -77,6 +78,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
    'claude-sonnet-4-20250514',
    'claude-sonnet-4',
    'claude-opus-4-20250514',
+    'claude-opus-4-1-20250805',
    'gpt-4o-mini',
    'gpt-4o',
    'o1-2024-12-17',
@@ -92,6 +94,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
    'kimi-k2-instruct',
    'Qwen3-Coder-480B-A35B-Instruct',
    'qwen3-coder',  # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
+    'gpt-5-2025-08-07',
 ]

 REASONING_EFFORT_SUPPORTED_MODELS = [
@@ -105,6 +108,7 @@ REASONING_EFFORT_SUPPORTED_MODELS = [
    'o4-mini-2025-04-16',
    'gemini-2.5-flash',
    'gemini-2.5-pro',
+    'gpt-5-2025-08-07',
 ]

 MODELS_WITHOUT_STOP_WORDS = [
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -676,9 +676,7 @@ class ActionExecutor:

 if __name__ == '__main__':
    logger.warning('Starting Action Execution Server')
-    logger.warning('Arguments passed to script:')
-    for i, arg in enumerate(sys.argv):
-        logger.warning(f'Argument {i}: {arg}')
+
    parser = argparse.ArgumentParser()
    parser.add_argument('port', type=int, help='Port to listen on')
    parser.add_argument('--working-dir', type=str, help='Working directory')
--- a/openhands/runtime/utils/runtime_init.py
+++ b/openhands/runtime/utils/runtime_init.py
@@ -49,124 +49,72 @@ def init_user_and_working_directory(
    if username == os.getenv('USER') and username not in ['root', 'openhands']:
        return None

-    # Skip root since it is already created
-    if username != 'root':
-        # Check if the username already exists
-        logger.info(f'Attempting to create user `{username}` with UID {user_id}.')
-        existing_user_id = -1
-        try:
-            result = subprocess.run(
-                f'id -u {username}', shell=True, check=True, capture_output=True
-            )
-            existing_user_id = int(result.stdout.decode().strip())
-
-            # The user ID already exists, skip setup
-            if existing_user_id == user_id:
-                logger.debug(
-                    f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
-                )
-            else:
-                logger.warning(
-                    f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
-                )
-                return existing_user_id
-            return None
-        except subprocess.CalledProcessError as e:
-            # Returncode 1 indicates, that the user does not exist yet
-            if e.returncode == 1:
-                logger.info(
-                    f'User `{username}` does not exist. Proceeding with user creation.'
-                )
-            else:
-                logger.error(
-                    f'Error checking user `{username}`, skipping setup:\n{e}\n'
-                )
-                raise
-
-        # Add sudoer
-        sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
-        output = subprocess.run(sudoer_line, shell=True, capture_output=True)
-        if output.returncode != 0:
-            raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
-        logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
-
-        command = (
-            f'useradd -rm -d /home/{username} -s /bin/bash '
-            f'-g root -G sudo -u {user_id} {username}'
-        )
-        output = subprocess.run(command, shell=True, capture_output=True)
-        if output.returncode == 0:
-            logger.debug(
-                f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
-            )
-        else:
-            raise RuntimeError(
-                f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
-            )
-
    # First create the working directory, independent of the user
    logger.debug(f'Client working directory: {initial_cwd}')
    command = f'umask 002; mkdir -p {initial_cwd}'
    output = subprocess.run(command, shell=True, capture_output=True)
    out_str = output.stdout.decode()
-    logger.debug(f'mkdir command result: returncode={output.returncode}, stdout=[{out_str}], stderr=[{output.stderr.decode()}]')

-    # Check current ownership before changing it
-    check_cmd = f'ls -la {initial_cwd}'
-    check_output = subprocess.run(check_cmd, shell=True, capture_output=True)
-    logger.debug(f'Current ownership: {check_output.stdout.decode()}')
-
-    # Check if we're running as root
-    whoami_output = subprocess.run('whoami', shell=True, capture_output=True)
-    current_user = whoami_output.stdout.decode().strip()
-    logger.debug(f'Current user: {current_user}')
-    
-    # Use sudo only if not running as root
-    sudo_prefix = '' if current_user == 'root' else 'sudo '
-    
-    command = f'{sudo_prefix}chown -R {username}:{username} {initial_cwd}'
-    logger.debug(f'Executing chown command: {command}')
+    command = f'chown -R {username}:root {initial_cwd}'
    output = subprocess.run(command, shell=True, capture_output=True)
    out_str += output.stdout.decode()
-    logger.debug(f'chown command result: returncode={output.returncode}, stdout=[{output.stdout.decode()}], stderr=[{output.stderr.decode()}]')
-    if output.returncode != 0 or output.stderr:
-        err_str = output.stderr.decode()
-        logger.error(f'chown command failed: returncode={output.returncode}, stderr: {err_str}')
-        out_str += f' [stderr: {err_str}]'

-    command = f'{sudo_prefix}chmod g+rw {initial_cwd}'
-    logger.debug(f'Executing chmod command: {command}')
+    command = f'chmod g+rw {initial_cwd}'
    output = subprocess.run(command, shell=True, capture_output=True)
    out_str += output.stdout.decode()
-    logger.debug(f'chmod command result: returncode={output.returncode}, stdout=[{output.stdout.decode()}], stderr=[{output.stderr.decode()}]')
-    if output.returncode != 0 or output.stderr:
-        err_str = output.stderr.decode()
-        logger.error(f'chmod command failed: returncode={output.returncode}, stderr: {err_str}')
-        out_str += f' [stderr: {err_str}]'
-
-    # Verify final ownership
-    check_cmd = f'ls -la {initial_cwd}'
-    check_output = subprocess.run(check_cmd, shell=True, capture_output=True)
-    final_ownership = check_output.stdout.decode()
-    logger.debug(f'Final ownership: {final_ownership}')
-    
-    # If chown failed and directory is still owned by root, try alternative approaches
-    if 'root root' in final_ownership and username != 'root':
-        logger.warning(f'Directory {initial_cwd} is still owned by root, trying alternative approaches')
-        
-        # Try to make it writable for the user's group
-        alt_command = f'{sudo_prefix}chmod -R g+rwx {initial_cwd}'
-        logger.debug(f'Executing alternative chmod command: {alt_command}')
-        alt_output = subprocess.run(alt_command, shell=True, capture_output=True)
-        logger.debug(f'Alternative chmod result: returncode={alt_output.returncode}, stderr=[{alt_output.stderr.decode()}]')
-        
-        # Try to add the user to the root group (as a last resort)
-        if alt_output.returncode != 0:
-            group_command = f'{sudo_prefix}usermod -aG root {username}'
-            logger.debug(f'Executing usermod command: {group_command}')
-            group_output = subprocess.run(group_command, shell=True, capture_output=True)
-            logger.debug(f'Usermod result: returncode={group_output.returncode}, stderr=[{group_output.stderr.decode()}]')
-    
    logger.debug(f'Created working directory. Output: [{out_str}]')

+    # Skip root since it is already created
+    if username == 'root':
+        return None
+
+    # Check if the username already exists
+    existing_user_id = -1
+    try:
+        result = subprocess.run(
+            f'id -u {username}', shell=True, check=True, capture_output=True
+        )
+        existing_user_id = int(result.stdout.decode().strip())
+
+        # The user ID already exists, skip setup
+        if existing_user_id == user_id:
+            logger.debug(
+                f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
+            )
+        else:
+            logger.warning(
+                f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
+            )
+            return existing_user_id
+        return None
+    except subprocess.CalledProcessError as e:
+        # Returncode 1 indicates, that the user does not exist yet
+        if e.returncode == 1:
+            logger.debug(
+                f'User `{username}` does not exist. Proceeding with user creation.'
+            )
+        else:
+            logger.error(f'Error checking user `{username}`, skipping setup:\n{e}\n')
+            raise
+
+    # Add sudoer
+    sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
+    output = subprocess.run(sudoer_line, shell=True, capture_output=True)
+    if output.returncode != 0:
+        raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
+    logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
+
+    command = (
+        f'useradd -rm -d /home/{username} -s /bin/bash '
+        f'-g root -G sudo -u {user_id} {username}'
+    )
+    output = subprocess.run(command, shell=True, capture_output=True)
+    if output.returncode == 0:
+        logger.debug(
+            f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
+        )
+    else:
+        raise RuntimeError(
+            f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
+        )
    return None
--- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2
+++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2
@@ -56,8 +56,16 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/openhands/
 # Add /openhands/bin to PATH
 ENV PATH="/openhands/bin:${PATH}"

+# Remove UID 1000 named pn or ubuntu, so the 'openhands' user can be created from ubuntu hosts
+RUN (if getent passwd 1000 | grep -q pn; then userdel pn; fi) && \
+    (if getent passwd 1000 | grep -q ubuntu; then userdel ubuntu; fi)


+# Create necessary directories
+RUN mkdir -p /openhands && \
+    mkdir -p /openhands/logs && \
+    mkdir -p /openhands/poetry
+

 # ================================================================
 # Define Docker installation macro
@@ -103,29 +111,6 @@ RUN \
 # Configure Docker daemon with MTU 1450 to prevent packet fragmentation issues
 RUN mkdir -p /etc/docker && \
    echo '{"mtu": 1450}' > /etc/docker/daemon.json
-
-# Remove UID 1000 and GID 1000 users/groups that might conflict with openhands user
-RUN (if getent passwd 1000 | grep -q pn; then userdel pn; fi) && \
-    (if getent passwd 1000 | grep -q ubuntu; then userdel ubuntu; fi) && \
-    (if getent group 1000 | grep -q pn; then groupdel pn; fi) && \
-    (if getent group 1000 | grep -q ubuntu; then groupdel ubuntu; fi)
-
-# Create openhands group and user
-RUN groupadd -g 1000 openhands && \
-    useradd -u 1000 -g 1000 -m -s /bin/bash openhands && \
-    usermod -aG sudo openhands && \
-    echo 'openhands ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-
-# Create necessary directories
-RUN mkdir -p /openhands && \
-    mkdir -p /openhands/logs && \
-    mkdir -p /openhands/poetry && \
-    mkdir -p /workspace && \
-    mkdir -p /workspace/.openhands && \
-    mkdir -p /home/openhands/.openhands && \
-    chown -R openhands:openhands /openhands && \
-    chown -R openhands:openhands /workspace && \
-    chown -R openhands:openhands /home/openhands
 {% endmacro %}

 # Install Docker only if not a swebench or mswebench image
@@ -165,8 +150,7 @@ RUN if [ -z "${RELEASE_TAG}" ]; then \
    if [ -d "${OPENVSCODE_SERVER_ROOT}" ]; then rm -rf "${OPENVSCODE_SERVER_ROOT}"; fi && \
    mv ${RELEASE_TAG}-linux-${arch} ${OPENVSCODE_SERVER_ROOT} && \
    cp ${OPENVSCODE_SERVER_ROOT}/bin/remote-cli/openvscode-server ${OPENVSCODE_SERVER_ROOT}/bin/remote-cli/code && \
-    rm -f ${RELEASE_TAG}-linux-${arch}.tar.gz && \
-    chown -R openhands:openhands ${OPENVSCODE_SERVER_ROOT}
+    rm -f ${RELEASE_TAG}-linux-${arch}.tar.gz



@@ -175,12 +159,10 @@ RUN if [ -z "${RELEASE_TAG}" ]; then \
 {% macro install_vscode_extensions() %}
 # Install our custom extension
 RUN mkdir -p ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world && \
-    cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/hello-world/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world/ && \
-    chown -R openhands:openhands ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world
+    cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/hello-world/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world/

 RUN mkdir -p ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor && \
-    cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/memory-monitor/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor/ && \
-    chown -R openhands:openhands ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor
+    cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/memory-monitor/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor/

 # Some extension dirs are removed because they trigger false positives in vulnerability scans.
 RUN rm -rf ${OPENVSCODE_SERVER_ROOT}/extensions/{handlebars,pug,json,diff,grunt,ini,npm}
@@ -203,12 +185,9 @@ RUN \
    {% endif %}
    # Set environment variables
    /openhands/micromamba/bin/micromamba run -n openhands poetry run python -c "import sys; print('OH_INTERPRETER_PATH=' + sys.executable)" >> /etc/environment && \
-    # Set permissions and ownership
+    # Set permissions
    chmod -R g+rws /openhands/poetry && \
-    chown -R openhands:openhands /openhands/poetry && \
    mkdir -p /openhands/workspace && chmod -R g+rws,o+rw /openhands/workspace && \
-    chown -R openhands:openhands /openhands/workspace && \
-    chown -R openhands:openhands /openhands/micromamba && \
    # Clean up
    /openhands/micromamba/bin/micromamba run -n openhands poetry cache clear --all . -n && \
    apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
@@ -229,8 +208,7 @@ RUN \
 RUN mkdir -p /openhands/micromamba/bin && \
    /bin/bash -c "PREFIX_LOCATION=/openhands/micromamba BIN_FOLDER=/openhands/micromamba/bin INIT_YES=no CONDA_FORGE_YES=yes $(curl -L https://micro.mamba.pm/install.sh)" && \
    /openhands/micromamba/bin/micromamba config remove channels defaults && \
-    /openhands/micromamba/bin/micromamba config list && \
-    chown -R openhands:openhands /openhands/micromamba
+    /openhands/micromamba/bin/micromamba config list

 # Create the openhands virtual environment and install poetry and python
 RUN /openhands/micromamba/bin/micromamba create -n openhands -y && \
@@ -241,12 +219,11 @@ RUN \
    if [ -d /openhands/code ]; then rm -rf /openhands/code; fi && \
    mkdir -p /openhands/code/openhands && \
    touch /openhands/code/openhands/__init__.py && \
-    chown -R openhands:openhands /openhands/code && \
    # Set global git configuration to ensure proper author/committer information
    git config --global user.name "openhands" && \
    git config --global user.email "openhands@all-hands.dev"

-COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
+COPY ./code/pyproject.toml ./code/poetry.lock /openhands/code/

 {{ install_dependencies() }}

@@ -257,43 +234,14 @@ COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openh

 {{ setup_vscode_server() }}

-# ================================================================
-# Ensure openhands user and directories exist (for non-scratch builds)
-# ================================================================
-{% if not build_from_scratch %}
-# Remove UID 1000 and GID 1000 users/groups that might conflict with openhands user
-RUN (if getent passwd 1000 | grep -q pn; then userdel pn; fi) && \
-    (if getent passwd 1000 | grep -q ubuntu; then userdel ubuntu; fi) && \
-    (if getent group 1000 | grep -q pn; then groupdel pn; fi) && \
-    (if getent group 1000 | grep -q ubuntu; then groupdel ubuntu; fi)
-
-# Create openhands group and user if they don't exist
-RUN (getent group openhands || groupadd -g 1000 openhands) && \
-    (getent passwd openhands || useradd -u 1000 -g 1000 -m -s /bin/bash openhands) && \
-    usermod -aG sudo openhands && \
-    echo 'openhands ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-
-# Create necessary directories and set ownership
-RUN mkdir -p /openhands && \
-    mkdir -p /openhands/logs && \
-    mkdir -p /openhands/poetry && \
-    mkdir -p /workspace && \
-    mkdir -p /workspace/.openhands && \
-    mkdir -p /home/openhands/.openhands && \
-    chown -R openhands:openhands /openhands && \
-    chown -R openhands:openhands /workspace && \
-    chown -R openhands:openhands /home/openhands
-{% endif %}
-
 # ================================================================
 # Copy Project source files
 # ================================================================
 RUN if [ -d /openhands/code/openhands ]; then rm -rf /openhands/code/openhands; fi
-COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
+COPY ./code/pyproject.toml ./code/poetry.lock /openhands/code/

-COPY --chown=openhands:openhands ./code/openhands /openhands/code/openhands
-RUN chmod a+rwx /openhands/code/openhands/__init__.py && \
-    chown -R openhands:openhands /openhands/code
+COPY ./code/openhands /openhands/code/openhands
+RUN chmod a+rwx /openhands/code/openhands/__init__.py



@@ -307,12 +255,3 @@ RUN chmod a+rwx /openhands/code/openhands/__init__.py && \

 # Install extra dependencies if specified
 {% if extra_deps %}RUN {{ extra_deps }} {% endif %}
-
-# Copy entrypoint script and make it executable
-COPY entrypoint.sh /usr/local/bin/entrypoint.sh
-RUN chmod +x /usr/local/bin/entrypoint.sh
-
-# Set the entrypoint to run as root first, then switch to openhands
-ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
-
-# Note: We don't set USER openhands here because the entrypoint handles the user switch
--- a/openhands/runtime/utils/runtime_templates/entrypoint.sh
+++ b/openhands/runtime/utils/runtime_templates/entrypoint.sh
@@ -1,32 +0,0 @@
-#!/bin/bash
-set -e
-
-# This entrypoint script runs as root to fix workspace ownership before switching to openhands user
-
-echo "🔧 OpenHands Runtime Entrypoint - Fixing workspace ownership..."
-
-# Check if /workspace exists and fix ownership
-if [ -d "/workspace" ]; then
-    echo "📁 Found /workspace directory, checking ownership..."
-    ls -la /workspace
-    
-    # Fix ownership to openhands:openhands
-    echo "🔧 Changing ownership to openhands:openhands..."
-    chown -R openhands:openhands /workspace
-    chmod -R g+rw /workspace
-    
-    echo "✅ Ownership fixed:"
-    ls -la /workspace
-else
-    echo "⚠️  /workspace directory not found, will be created later"
-fi
-
-# If arguments are provided, execute them as the openhands user
-if [ $# -gt 0 ]; then
-    echo "🚀 Switching to openhands user and executing: $@"
-    # Use exec to replace the current process and preserve all arguments
-    exec su openhands -c "exec \"\$@\"" -- "$@"
-else
-    echo "🚀 Switching to openhands user with bash shell"
-    exec su - openhands
-fi
--- a/openhands/server/routes/manage_conversations.py
+++ b/openhands/server/routes/manage_conversations.py
@@ -10,17 +10,18 @@ from jinja2 import Environment, FileSystemLoader
 from pydantic import BaseModel, ConfigDict, Field

 from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.mcp_config import MCPConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
    ChangeAgentStateAction,
    NullAction,
 )
 from openhands.events.event_filter import EventFilter
+from openhands.events.event_store import EventStore
 from openhands.events.observation import (
    AgentStateChangedObservation,
    NullObservation,
 )
-from openhands.events.stream import EventStream
 from openhands.integrations.provider import (
    PROVIDER_TOKEN_TYPE,
    ProviderHandler,
@@ -44,11 +45,11 @@ from openhands.server.services.conversation_service import (
    create_new_conversation,
    setup_init_convo_settings,
 )
-from openhands.server.session.conversation import ServerConversation
 from openhands.server.shared import (
    ConversationStoreImpl,
    config,
    conversation_manager,
+    file_store,
 )
 from openhands.server.types import LLMAuthenticationError, MissingSettingsError
 from openhands.server.user_auth import (
@@ -60,7 +61,7 @@ from openhands.server.user_auth import (
    get_user_settings_store,
 )
 from openhands.server.user_auth.user_auth import AuthType
-from openhands.server.utils import get_conversation as get_conversation_object
+from openhands.server.utils import get_conversation as get_conversation_metadata
 from openhands.server.utils import get_conversation_store
 from openhands.storage.conversation.conversation_store import ConversationStore
 from openhands.storage.data_models.conversation_metadata import (
@@ -87,6 +88,7 @@ class InitSessionRequest(BaseModel):
    suggested_task: SuggestedTask | None = None
    create_microagent: CreateMicroagent | None = None
    conversation_instructions: str | None = None
+    mcp_config: MCPConfig | None = None
    # Only nested runtimes require the ability to specify a conversation id, and it could be a security risk
    if os.getenv('ALLOW_SET_CONVERSATION_ID', '0') == '1':
        conversation_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
@@ -178,6 +180,7 @@ async def new_conversation(
            conversation_instructions=conversation_instructions,
            git_provider=git_provider,
            conversation_id=conversation_id,
+            mcp_config=data.mcp_config,
        )

        return ConversationResponse(
@@ -331,23 +334,20 @@ async def delete_conversation(
    return True


-@app.get('/conversations/{conversation_id}/remember_prompt')
+@app.get('/conversations/{conversation_id}/remember-prompt')
 async def get_prompt(
+    conversation_id: str,
    event_id: int,
    user_settings: SettingsStore = Depends(get_user_settings_store),
-    conversation: ServerConversation | None = Depends(get_conversation_object),
+    metadata: ConversationMetadata = Depends(get_conversation_metadata),
 ):
-    if conversation is None:
-        return JSONResponse(
-            status_code=404,
-            content={'error': 'Conversation not found.'},
-        )
-
-    # get event stream for the conversation
-    event_stream = conversation.event_stream
+    # get event store for the conversation
+    event_store = EventStore(
+        sid=conversation_id, file_store=file_store, user_id=metadata.user_id
+    )

    # retrieve the relevant events
-    stringified_events = _get_contextual_events(event_stream, event_id)
+    stringified_events = _get_contextual_events(event_store, event_id)

    # generate a prompt
    settings = await user_settings.load()
@@ -551,7 +551,7 @@ async def stop_conversation(
        )


-def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
+def _get_contextual_events(event_store: EventStore, event_id: int) -> str:
    # find the specified events to learn from
    # Get X events around the target event
    context_size = 4
@@ -567,7 +567,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
    )  # the types of events that can be in an agent's history

    # from event_id - context_size to event_id..
-    context_before = event_stream.search_events(
+    context_before = event_store.search_events(
        start_id=event_id,
        filter=agent_event_filter,
        reverse=True,
@@ -575,7 +575,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
    )

    # from event_id to event_id + context_size + 1
-    context_after = event_stream.search_events(
+    context_after = event_store.search_events(
        start_id=event_id + 1,
        filter=agent_event_filter,
        limit=context_size + 1,
--- a/openhands/server/services/conversation_service.py
+++ b/openhands/server/services/conversation_service.py
@@ -2,6 +2,7 @@ import uuid
 from types import MappingProxyType
 from typing import Any

+from openhands.core.config.mcp_config import MCPConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.message import MessageAction
 from openhands.experiments.experiment_manager import ExperimentManagerImpl
@@ -44,6 +45,7 @@ async def create_new_conversation(
    attach_convo_id: bool = False,
    git_provider: ProviderType | None = None,
    conversation_id: str | None = None,
+    mcp_config: MCPConfig | None = None,
 ) -> AgentLoopInfo:
    logger.info(
        'Creating conversation',
@@ -82,6 +84,9 @@ async def create_new_conversation(
    session_init_args['selected_branch'] = selected_branch
    session_init_args['git_provider'] = git_provider
    session_init_args['conversation_instructions'] = conversation_instructions
+    if mcp_config:
+        session_init_args['mcp_config'] = mcp_config
+
    conversation_init_data = ConversationInitData(**session_init_args)

    logger.info('Loading conversation store')
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -124,10 +124,12 @@ class Session:
        )

        # Set Git user configuration if provided in settings
-        if hasattr(settings, 'git_user_name') and settings.git_user_name:
-            self.config.git_user_name = settings.git_user_name
-        if hasattr(settings, 'git_user_email') and settings.git_user_email:
-            self.config.git_user_email = settings.git_user_email
+        git_user_name = getattr(settings, 'git_user_name', None)
+        if git_user_name is not None:
+            self.config.git_user_name = git_user_name
+        git_user_email = getattr(settings, 'git_user_email', None)
+        if git_user_email is not None:
+            self.config.git_user_email = git_user_email
        max_iterations = settings.max_iterations or self.config.max_iterations

        # Prioritize settings over config for max_budget_per_task
@@ -152,6 +154,14 @@ class Session:
        self.logger.debug(
            f'MCP configuration before setup - self.config.mcp_config: {self.config.mcp}'
        )
+
+        # Check if settings has custom mcp_config
+        mcp_config = getattr(settings, 'mcp_config', None)
+        if mcp_config is not None:
+            # Use the provided MCP SHTTP servers instead of default setup
+            self.config.mcp = self.config.mcp.merge(mcp_config)
+            self.logger.debug(f'Merged custom MCP Config: {mcp_config}')
+
        # Add OpenHands' MCP server by default
        openhands_mcp_server, openhands_mcp_stdio_servers = (
            OpenHandsMCPConfigImpl.create_default_mcp_server_config(
@@ -163,7 +173,7 @@ class Session:
            self.config.mcp.shttp_servers.append(openhands_mcp_server)
            self.logger.debug('Added default MCP HTTP server to config')

-        self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)
+            self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)

        self.logger.debug(
            f'MCP configuration after setup - self.config.mcp: {self.config.mcp}'
--- a/openhands/utils/llm.py
+++ b/openhands/utils/llm.py
@@ -56,6 +56,7 @@ def get_supported_llm_models(config: OpenHandsConfig) -> list[str]:
    # Add OpenHands provider models
    openhands_models = [
        'openhands/claude-sonnet-4-20250514',
+        'openhands/gpt-5-2025-08-07',
        'openhands/claude-opus-4-20250514',
        'openhands/gemini-2.5-pro',
        'openhands/o3',
--- a/openhands/utils/prompt.py
+++ b/openhands/utils/prompt.py
@@ -4,6 +4,7 @@ from itertools import islice

 from jinja2 import Template

+from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
 from openhands.controller.state.state import State
 from openhands.core.message import Message, TextContent
 from openhands.events.observation.agent import MicroagentKnowledge
@@ -91,7 +92,8 @@ class PromptManager:
            return Template(file.read())

    def get_system_message(self) -> str:
-        return self.system_template.render().strip()
+        system_message = self.system_template.render().strip()
+        return refine_prompt(system_message)

    def get_example_user_message(self) -> str:
        """This is an initial user message that can be provided to the agent
--- a/openhands/utils/term_color.py
+++ b/openhands/utils/term_color.py
@@ -10,6 +10,7 @@ class TermColor(Enum):
    SUCCESS = 'green'
    ERROR = 'red'
    INFO = 'blue'
+    GREY = 'dark_grey'


 def colorize(text: str, color: TermColor = TermColor.WARNING) -> str:
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,6 +0,0 @@
-{
-  "name": "OpenHands",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {}
-}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ numpy = "*"
 json-repair = "*"
 browsergym-core = "0.13.3"                         # integrate browsergym-core as the browsing interface
 html2text = "*"
+rich = "*"                                         # For terminal formatting and markdown rendering
 deprecated = "*"
 pexpect = "*"
 jinja2 = "^3.1.3"
@@ -166,7 +167,7 @@ joblib = "*"
 swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }

 [tool.poetry.scripts]
-openhands = "openhands.cli.main:main"
+openhands = "openhands.cli.entry:main"

 [tool.poetry.group.testgeneval.dependencies]
 fuzzywuzzy = "^0.18.0"
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -1,17 +1,36 @@
 import pytest

-from openhands.core.config import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS, get_parser
+from openhands.core.config import (
+    get_evaluation_parser,
+    get_headless_parser,
+)


-def test_parser_default_values():
-    parser = get_parser()
+def test_headless_parser_default_values():
+    parser = get_headless_parser()
    args = parser.parse_args([])

    assert args.directory is None
    assert args.task == ''
    assert args.file is None
-    assert args.agent_cls == OH_DEFAULT_AGENT
-    assert args.max_iterations == OH_MAX_ITERATIONS
+    assert args.agent_cls is None
+    assert args.max_iterations is None
+    assert args.max_budget_per_task is None
+    assert args.llm_config is None
+    assert args.name == ''
+    assert not args.no_auto_continue
+    assert args.selected_repo is None
+
+
+def test_evaluation_parser_default_values():
+    parser = get_evaluation_parser()
+    args = parser.parse_args([])
+
+    assert args.directory is None
+    assert args.task == ''
+    assert args.file is None
+    assert args.agent_cls is None
+    assert args.max_iterations is None
    assert args.max_budget_per_task is None
    assert args.eval_output_dir == 'evaluation/evaluation_outputs/outputs'
    assert args.eval_n_limit is None
@@ -23,8 +42,8 @@ def test_parser_default_values():
    assert args.selected_repo is None


-def test_parser_custom_values():
-    parser = get_parser()
+def test_evaluation_parser_custom_values():
+    parser = get_evaluation_parser()
    args = parser.parse_args(
        [
            '-v',
@@ -76,7 +95,7 @@ def test_parser_custom_values():


 def test_parser_file_overrides_task():
-    parser = get_parser()
+    parser = get_headless_parser()
    args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt'])

    assert args.task == 'task from command'
@@ -84,31 +103,31 @@ def test_parser_file_overrides_task():


 def test_parser_invalid_max_iterations():
-    parser = get_parser()
+    parser = get_headless_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['-i', 'not_a_number'])


 def test_parser_invalid_max_budget():
-    parser = get_parser()
+    parser = get_headless_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['-b', 'not_a_number'])


-def test_parser_invalid_eval_n_limit():
-    parser = get_parser()
+def test_evaluation_parser_invalid_eval_n_limit():
+    parser = get_evaluation_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['--eval-n-limit', 'not_a_number'])


-def test_parser_invalid_eval_num_workers():
-    parser = get_parser()
+def test_evaluation_parser_invalid_eval_num_workers():
+    parser = get_evaluation_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['--eval-num-workers', 'not_a_number'])


-def test_help_message(capsys):
-    parser = get_parser()
+def test_headless_parser_help_message(capsys):
+    parser = get_headless_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['--help'])
    captured = capsys.readouterr()
@@ -126,6 +145,41 @@ def test_help_message(capsys):
        '-c AGENT_CLS, --agent-cls AGENT_CLS',
        '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
        '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
+        '-l LLM_CONFIG, --llm-config LLM_CONFIG',
+        '--agent-config AGENT_CONFIG',
+        '-n NAME, --name NAME',
+        '--config-file CONFIG_FILE',
+        '--no-auto-continue',
+        '--selected-repo SELECTED_REPO',
+        '--log-level LOG_LEVEL',
+    ]
+
+    for element in expected_elements:
+        assert element in help_output, f"Expected '{element}' to be in the help message"
+
+    option_count = help_output.count('  -')
+    assert option_count == 15, f'Expected 15 options, found {option_count}'
+
+
+def test_evaluation_parser_help_message(capsys):
+    parser = get_evaluation_parser()
+    with pytest.raises(SystemExit):
+        parser.parse_args(['--help'])
+    captured = capsys.readouterr()
+    help_output = captured.out
+    print(help_output)
+    expected_elements = [
+        'usage:',
+        'Run OpenHands in evaluation mode',
+        'options:',
+        '-v, --version',
+        '-h, --help',
+        '-d DIRECTORY, --directory DIRECTORY',
+        '-t TASK, --task TASK',
+        '-f FILE, --file FILE',
+        '-c AGENT_CLS, --agent-cls AGENT_CLS',
+        '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
+        '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
        '--eval-output-dir EVAL_OUTPUT_DIR',
        '--eval-n-limit EVAL_N_LIMIT',
        '--eval-num-workers EVAL_NUM_WORKERS',
@@ -137,7 +191,6 @@ def test_help_message(capsys):
        '--config-file CONFIG_FILE',
        '--no-auto-continue',
        '--selected-repo SELECTED_REPO',
-        '--override-cli-mode OVERRIDE_CLI_MODE',
        '--log-level LOG_LEVEL',
    ]

@@ -145,11 +198,11 @@ def test_help_message(capsys):
        assert element in help_output, f"Expected '{element}' to be in the help message"

    option_count = help_output.count('  -')
-    assert option_count == 21, f'Expected 21 options, found {option_count}'
+    assert option_count == 20, f'Expected 20 options, found {option_count}'


 def test_selected_repo_format():
    """Test that the selected-repo argument accepts owner/repo format."""
-    parser = get_parser()
+    parser = get_headless_parser()
    args = parser.parse_args(['--selected-repo', 'owner/repo'])
    assert args.selected_repo == 'owner/repo'
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -325,7 +325,6 @@ async def test_run_session_with_initial_action(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -345,7 +344,6 @@ async def test_main_without_task(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function without a task."""
    loop = asyncio.get_running_loop()
@@ -359,7 +357,10 @@ async def test_main_without_task(
    mock_args.llm_config = None
    mock_args.name = None
    mock_args.file = None
-    mock_parse_args.return_value = mock_args
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -393,10 +394,9 @@ async def test_main_without_task(
    mock_run_session.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -412,11 +412,11 @@ async def test_main_without_task(
        None,
        session_name=None,
        skip_banner=False,
+        conversation_id=None,
    )


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -436,7 +436,6 @@ async def test_main_with_task(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function with a task."""
    loop = asyncio.get_running_loop()
@@ -449,7 +448,11 @@ async def test_main_with_task(
    mock_args.agent_cls = 'custom-agent'
    mock_args.llm_config = 'custom-config'
    mock_args.file = None
-    mock_parse_args.return_value = mock_args
+    mock_args.name = None
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -484,10 +487,9 @@ async def test_main_with_task(
    mock_run_session.side_effect = [True, False]

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -518,7 +520,6 @@ async def test_main_with_task(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -538,7 +539,6 @@ async def test_main_with_session_name_passes_name_to_run_session(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function with a session name passes it to run_session."""
    loop = asyncio.get_running_loop()
@@ -553,7 +553,10 @@ async def test_main_with_session_name_passes_name_to_run_session(
    mock_args.llm_config = None
    mock_args.name = test_session_name  # Set the session name
    mock_args.file = None
-    mock_parse_args.return_value = mock_args
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -587,10 +590,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
    mock_run_session.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -606,6 +608,7 @@ async def test_main_with_session_name_passes_name_to_run_session(
        None,
        session_name=test_session_name,
        skip_banner=False,
+        conversation_id=None,
    )


@@ -709,7 +712,6 @@ async def test_run_session_with_name_attempts_state_restore(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -729,7 +731,6 @@ async def test_main_security_check_fails(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function when security check fails."""
    loop = asyncio.get_running_loop()
@@ -739,7 +740,14 @@ async def test_main_security_check_fails(

    # Mock arguments
    mock_args = MagicMock()
-    mock_parse_args.return_value = mock_args
+    mock_args.agent_cls = None
+    mock_args.llm_config = None
+    mock_args.name = None
+    mock_args.file = None
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -761,10 +769,9 @@ async def test_main_security_check_fails(
    mock_check_security.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -775,7 +782,6 @@ async def test_main_security_check_fails(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -795,7 +801,6 @@ async def test_config_loading_order(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test the order of configuration loading in the main function.

@@ -816,7 +821,10 @@ async def test_config_loading_order(
    # Add a file property to avoid file I/O errors
    mock_args.file = None
    mock_args.log_level = 'INFO'
-    mock_parse_args.return_value = mock_args
+    mock_args.name = None
+    mock_args.conversation = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock read_task to return a dummy task
    mock_read_task.return_value = 'Test task'
@@ -859,10 +867,9 @@ async def test_config_loading_order(
    mock_run_session.return_value = False  # No new session requested

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions for argument parsing and config setup
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -892,7 +899,6 @@ async def test_config_loading_order(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -914,7 +920,6 @@ async def test_main_with_file_option(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function with a file option."""
    loop = asyncio.get_running_loop()
@@ -929,7 +934,10 @@ async def test_main_with_file_option(
    mock_args.name = None
    mock_args.file = '/path/to/test/file.txt'
    mock_args.task = None
-    mock_parse_args.return_value = mock_args
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -965,10 +973,9 @@ async def test_main_with_file_option(
    mock_run_session.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
--- a/tests/unit/test_cli_thought_order.py
+++ b/tests/unit/test_cli_thought_order.py
@@ -145,8 +145,8 @@ class TestThoughtDisplayOrder:
        # Verify that final thought is displayed
        mock_display_message.assert_called_once_with('This is a final thought.')

-    @patch('openhands.cli.tui.display_message')
-    def test_message_action_from_agent(self, mock_display_message):
+    @patch('openhands.cli.tui.display_agent_message')
+    def test_message_action_from_agent(self, mock_display_agent_message):
        """Test that MessageAction from agent is displayed."""
        config = MagicMock(spec=OpenHandsConfig)

@@ -156,8 +156,8 @@ class TestThoughtDisplayOrder:

        display_event(message_action, config)

-        # Verify that message is displayed
-        mock_display_message.assert_called_once_with('Hello from agent')
+        # Verify that agent message is displayed
+        mock_display_agent_message.assert_called_once_with('Hello from agent')

    @patch('openhands.cli.tui.display_message')
    def test_message_action_from_user_not_displayed(self, mock_display_message):
--- a/tests/unit/test_cli_tui.py
+++ b/tests/unit/test_cli_tui.py
@@ -6,6 +6,7 @@ from openhands.cli.tui import (
    CustomDiffLexer,
    UsageMetrics,
    UserCancelledError,
+    display_agent_message,
    display_banner,
    display_command,
    display_event,
@@ -26,6 +27,7 @@ from openhands.events import EventSource
 from openhands.events.action import (
    Action,
    ActionConfirmationStatus,
+    AgentFinishAction,
    CmdRunAction,
    MCPAction,
    MessageAction,
@@ -107,15 +109,15 @@ class TestDisplayFunctions:
        assert 'What do you want to build?' in message_text
        assert 'Type /help for help' in message_text

-    @patch('openhands.cli.tui.display_message')
-    def test_display_event_message_action(self, mock_display_message):
+    @patch('openhands.cli.tui.display_agent_message')
+    def test_display_event_message_action(self, mock_display_agent_message):
        config = MagicMock(spec=OpenHandsConfig)
        message = MessageAction(content='Test message')
        message._source = EventSource.AGENT

        display_event(message, config)

-        mock_display_message.assert_called_once_with('Test message')
+        mock_display_agent_message.assert_called_once_with('Test message')

    @patch('openhands.cli.tui.display_command')
    def test_display_event_cmd_action(self, mock_display_command):
@@ -181,6 +183,15 @@ class TestDisplayFunctions:
        display_event(action, config)

        mock_display_message.assert_called_once_with('Thinking about this...')
+        
+    @patch('openhands.cli.tui.display_agent_message')
+    def test_display_event_agent_finish(self, mock_display_agent_message):
+        config = MagicMock(spec=OpenHandsConfig)
+        finish_action = AgentFinishAction(final_thought='Task completed')
+
+        display_event(finish_action, config)
+
+        mock_display_agent_message.assert_called_once_with('Task completed', is_finish=True)

    @patch('openhands.cli.tui.display_mcp_action')
    def test_display_event_mcp_action(self, mock_display_mcp_action):
@@ -255,6 +266,37 @@ class TestDisplayFunctions:
        mock_print.assert_called_once()
        args, kwargs = mock_print.call_args
        assert message in str(args[0])
+        
+    @patch('openhands.cli.tui.shutil.get_terminal_size')
+    @patch('openhands.cli.tui.print_formatted_text')
+    def test_display_agent_message(self, mock_print_formatted, mock_terminal_size):
+        from collections import namedtuple
+        
+        # Mock terminal size
+        Size = namedtuple('Size', ['columns', 'lines'])
+        mock_terminal_size.return_value = Size(columns=80, lines=24)
+        
+        message = 'Agent message'
+        display_agent_message(message)
+        
+        # Should be called multiple times now (header, separator, content)
+        assert mock_print_formatted.call_count >= 3
+        
+    @patch('openhands.cli.tui.shutil.get_terminal_size')
+    @patch('openhands.cli.tui.print_formatted_text')
+    def test_display_agent_message_with_markdown(self, mock_print_formatted, mock_terminal_size):
+        from collections import namedtuple
+        
+        # Mock terminal size
+        Size = namedtuple('Size', ['columns', 'lines'])
+        mock_terminal_size.return_value = Size(columns=80, lines=24)
+        
+        # Test with markdown content
+        message = '# Heading\n\nThis is **bold** text.'
+        display_agent_message(message)
+        
+        # Should be called multiple times now (header, separator, content)
+        assert mock_print_formatted.call_count >= 3

    @patch('openhands.cli.tui.print_container')
    def test_display_command_awaiting_confirmation(self, mock_print_container):
--- a/tests/unit/test_config_precedence.py
+++ b/tests/unit/test_config_precedence.py
@@ -3,6 +3,8 @@ from unittest.mock import MagicMock, patch
 import pytest

 from openhands.core.config import (
+    OH_DEFAULT_AGENT,
+    OH_MAX_ITERATIONS,
    OpenHandsConfig,
    get_llm_config_arg,
    setup_config_from_args,
@@ -308,3 +310,74 @@ def test_cli_settings_json_not_override_config_toml(
    # Verify that settings.json did not override config.toml
    assert test_llm_config.model == 'config-toml-model'
    assert test_llm_config.api_key == 'config-toml-api-key'
+
+
+def test_default_values_applied_when_none():
+    """Test that default values are applied when config values are None."""
+
+    # Create mock args with None values for agent_cls and max_iterations
+    mock_args = MagicMock()
+    mock_args.config_file = None
+    mock_args.llm_config = None
+    mock_args.agent_cls = None
+    mock_args.max_iterations = None
+
+    # Load config
+    with patch(
+        'openhands.core.config.utils.load_openhands_config',
+        return_value=OpenHandsConfig(),
+    ):
+        config = setup_config_from_args(mock_args)
+
+    # Verify they match the expected defaults
+    assert config.default_agent == OH_DEFAULT_AGENT
+    assert config.max_iterations == OH_MAX_ITERATIONS
+
+
+def test_cli_args_override_defaults():
+    """Test that CLI arguments override default values."""
+
+    # Create mock args with custom values
+    mock_args = MagicMock()
+    mock_args.config_file = None
+    mock_args.llm_config = None
+    mock_args.agent_cls = 'CustomAgent'
+    mock_args.max_iterations = 50
+
+    # Load config
+    with patch(
+        'openhands.core.config.utils.load_openhands_config',
+        return_value=OpenHandsConfig(),
+    ):
+        config = setup_config_from_args(mock_args)
+
+    # Verify custom values are used instead of defaults
+    assert config.default_agent == 'CustomAgent'
+    assert config.max_iterations == 50
+
+
+def test_cli_args_none_uses_config_toml_values():
+    """Test that when CLI args agent_cls and max_iterations are None, config.toml values are used."""
+
+    # Create mock args with None values for agent_cls and max_iterations
+    mock_args = MagicMock()
+    mock_args.config_file = None
+    mock_args.llm_config = None
+    mock_args.agent_cls = None
+    mock_args.max_iterations = None
+
+    # Create a config with specific values from config.toml
+    config_from_toml = OpenHandsConfig()
+    config_from_toml.default_agent = 'ConfigTomlAgent'
+    config_from_toml.max_iterations = 100
+
+    # Load config
+    with patch(
+        'openhands.core.config.utils.load_openhands_config',
+        return_value=config_from_toml,
+    ):
+        config = setup_config_from_args(mock_args)
+
+    # Verify config.toml values are preserved when CLI args are None
+    assert config.default_agent == 'ConfigTomlAgent'
+    assert config.max_iterations == 100
--- a/tests/unit/test_get_repository_microagents.py
+++ b/tests/unit/test_get_repository_microagents.py
@@ -13,6 +13,7 @@ from openhands.integrations.service_types import (
    Repository,
 )
 from openhands.microagent.types import MicroagentContentResponse
+from openhands.server.dependencies import check_session_api_key
 from openhands.server.routes.git import app as git_app
 from openhands.server.user_auth import (
    get_access_token,
@@ -49,10 +50,15 @@ def test_client():
    def mock_get_user_id():
        return 'test_user'

+    def mock_check_session_api_key():
+        # Mock session API key check to always pass for tests
+        return None
+
    # Override the dependencies in the app
    app.dependency_overrides[get_provider_tokens] = mock_get_provider_tokens
    app.dependency_overrides[get_access_token] = mock_get_access_token
    app.dependency_overrides[get_user_id] = mock_get_user_id
+    app.dependency_overrides[check_session_api_key] = mock_check_session_api_key

    yield TestClient(app)

--- a/tests/unit/test_middleware.py
+++ b/tests/unit/test_middleware.py
@@ -46,24 +46,32 @@ def test_localhost_cors_middleware_init_without_env_var():


 def test_localhost_cors_middleware_is_allowed_origin_localhost(app):
-    """Test that localhost origins are allowed regardless of port."""
-    app.add_middleware(LocalhostCORSMiddleware)
-    client = TestClient(app)
+    """Test that localhost origins are allowed regardless of port when no specific origins are configured."""
+    # Test without setting PERMITTED_CORS_ORIGINS to trigger localhost behavior
+    with patch.dict(os.environ, {}, clear=True):
+        app.add_middleware(LocalhostCORSMiddleware)
+        client = TestClient(app)

-    # Test with localhost
-    response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
-    assert response.status_code == 200
-    assert response.headers['access-control-allow-origin'] == 'http://localhost:8000'
+        # Test with localhost
+        response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
+        assert response.status_code == 200
+        assert (
+            response.headers['access-control-allow-origin'] == 'http://localhost:8000'
+        )

-    # Test with different port
-    response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
-    assert response.status_code == 200
-    assert response.headers['access-control-allow-origin'] == 'http://localhost:3000'
+        # Test with different port
+        response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
+        assert response.status_code == 200
+        assert (
+            response.headers['access-control-allow-origin'] == 'http://localhost:3000'
+        )

-    # Test with 127.0.0.1
-    response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
-    assert response.status_code == 200
-    assert response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
+        # Test with 127.0.0.1
+        response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
+        assert response.status_code == 200
+        assert (
+            response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
+        )


 def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
@@ -87,14 +95,15 @@ def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):

 def test_localhost_cors_middleware_missing_origin(app):
    """Test behavior when Origin header is missing."""
-    app.add_middleware(LocalhostCORSMiddleware)
-    client = TestClient(app)
+    with patch.dict(os.environ, {}, clear=True):
+        app.add_middleware(LocalhostCORSMiddleware)
+        client = TestClient(app)

-    # Test without Origin header
-    response = client.get('/test')
-    assert response.status_code == 200
-    # There should be no access-control-allow-origin header
-    assert 'access-control-allow-origin' not in response.headers
+        # Test without Origin header
+        response = client.get('/test')
+        assert response.status_code == 200
+        # There should be no access-control-allow-origin header
+        assert 'access-control-allow-origin' not in response.headers


 def test_localhost_cors_middleware_inheritance():
--- a/tests/unit/test_windows_prompt_refinement.py
+++ b/tests/unit/test_windows_prompt_refinement.py
@@ -0,0 +1,179 @@
+import sys
+from unittest.mock import patch
+
+import pytest
+
+from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
+from openhands.core.config import AgentConfig
+from openhands.llm.llm import LLM
+
+# Skip all tests in this module if not running on Windows
+pytestmark = pytest.mark.skipif(
+    sys.platform != 'win32', reason='Windows prompt refinement tests require Windows'
+)
+
+
+@pytest.fixture
+def mock_llm():
+    """Create a mock LLM for testing."""
+    llm = LLM(config={'model': 'gpt-4', 'api_key': 'test'})
+    return llm
+
+
+@pytest.fixture
+def agent_config():
+    """Create a basic agent config for testing."""
+    return AgentConfig()
+
+
+def test_codeact_agent_system_prompt_no_bash_on_windows(mock_llm, agent_config):
+    """Test that CodeActAgent's system prompt doesn't contain 'bash' on Windows."""
+    # Create a CodeActAgent instance
+    agent = CodeActAgent(llm=mock_llm, config=agent_config)
+
+    # Get the system prompt
+    system_prompt = agent.prompt_manager.get_system_message()
+
+    # Assert that 'bash' doesn't exist in the system prompt (case-insensitive)
+    assert 'bash' not in system_prompt.lower(), (
+        f"System prompt contains 'bash' on Windows platform. "
+        f"It should be replaced with 'powershell'. "
+        f'System prompt: {system_prompt}'
+    )
+
+    # Verify that 'powershell' exists instead (case-insensitive)
+    assert 'powershell' in system_prompt.lower(), (
+        f"System prompt should contain 'powershell' on Windows platform. "
+        f'System prompt: {system_prompt}'
+    )
+
+
+def test_codeact_agent_tool_descriptions_no_bash_on_windows(mock_llm, agent_config):
+    """Test that CodeActAgent's tool descriptions don't contain 'bash' on Windows."""
+    # Create a CodeActAgent instance
+    agent = CodeActAgent(llm=mock_llm, config=agent_config)
+
+    # Get the tools
+    tools = agent.tools
+
+    # Check each tool's description and parameters
+    for tool in tools:
+        if tool['type'] == 'function':
+            function_info = tool['function']
+
+            # Check function description
+            description = function_info.get('description', '')
+            assert 'bash' not in description.lower(), (
+                f"Tool '{function_info['name']}' description contains 'bash' on Windows. "
+                f'Description: {description}'
+            )
+
+            # Check parameter descriptions
+            parameters = function_info.get('parameters', {})
+            properties = parameters.get('properties', {})
+
+            for param_name, param_info in properties.items():
+                param_description = param_info.get('description', '')
+                assert 'bash' not in param_description.lower(), (
+                    f"Tool '{function_info['name']}' parameter '{param_name}' "
+                    f"description contains 'bash' on Windows. "
+                    f'Parameter description: {param_description}'
+                )
+
+
+def test_in_context_learning_example_no_bash_on_windows():
+    """Test that in-context learning examples don't contain 'bash' on Windows."""
+    from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
+    from openhands.agenthub.codeact_agent.tools.finish import FinishTool
+    from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
+        create_str_replace_editor_tool,
+    )
+    from openhands.llm.fn_call_converter import get_example_for_tools
+
+    # Create a sample set of tools
+    tools = [
+        create_cmd_run_tool(),
+        create_str_replace_editor_tool(),
+        FinishTool,
+    ]
+
+    # Get the in-context learning example
+    example = get_example_for_tools(tools)
+
+    # Assert that 'bash' doesn't exist in the example (case-insensitive)
+    assert 'bash' not in example.lower(), (
+        f"In-context learning example contains 'bash' on Windows platform. "
+        f"It should be replaced with 'powershell'. "
+        f'Example: {example}'
+    )
+
+    # Verify that 'powershell' exists instead (case-insensitive)
+    if example:  # Only check if example is not empty
+        assert 'powershell' in example.lower(), (
+            f"In-context learning example should contain 'powershell' on Windows platform. "
+            f'Example: {example}'
+        )
+
+
+def test_refine_prompt_function_works():
+    """Test that the refine_prompt function correctly replaces 'bash' with 'powershell'."""
+    from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
+
+    # Test basic replacement
+    test_prompt = 'Execute a bash command to list files'
+    refined_prompt = refine_prompt(test_prompt)
+
+    assert 'bash' not in refined_prompt.lower()
+    assert 'powershell' in refined_prompt.lower()
+    assert refined_prompt == 'Execute a powershell command to list files'
+
+    # Test multiple occurrences
+    test_prompt = 'Use bash to run bash commands in the bash shell'
+    refined_prompt = refine_prompt(test_prompt)
+
+    assert 'bash' not in refined_prompt.lower()
+    assert (
+        refined_prompt
+        == 'Use powershell to run powershell commands in the powershell shell'
+    )
+
+    # Test case sensitivity
+    test_prompt = 'BASH and Bash and bash should all be replaced'
+    refined_prompt = refine_prompt(test_prompt)
+
+    assert 'bash' not in refined_prompt.lower()
+    assert (
+        refined_prompt
+        == 'powershell and powershell and powershell should all be replaced'
+    )
+
+    # Test execute_bash tool name replacement
+    test_prompt = 'Use the execute_bash tool to run commands'
+    refined_prompt = refine_prompt(test_prompt)
+
+    assert 'execute_bash' not in refined_prompt.lower()
+    assert 'execute_powershell' in refined_prompt.lower()
+    assert refined_prompt == 'Use the execute_powershell tool to run commands'
+
+    # Test that words containing 'bash' but not equal to 'bash' are preserved
+    test_prompt = 'The bashful person likes bash-like syntax'
+    refined_prompt = refine_prompt(test_prompt)
+
+    # 'bashful' should be preserved, 'bash-like' should become 'powershell-like'
+    assert 'bashful' in refined_prompt
+    assert 'powershell-like' in refined_prompt
+    assert refined_prompt == 'The bashful person likes powershell-like syntax'
+
+
+def test_refine_prompt_function_on_non_windows():
+    """Test that the refine_prompt function doesn't change anything on non-Windows platforms."""
+    from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
+
+    # Mock sys.platform to simulate non-Windows
+    with patch('openhands.agenthub.codeact_agent.tools.bash.sys.platform', 'linux'):
+        test_prompt = 'Execute a bash command to list files'
+        refined_prompt = refine_prompt(test_prompt)
+
+        # On non-Windows, the prompt should remain unchanged
+        assert refined_prompt == test_prompt
+        assert 'bash' in refined_prompt.lower()
Author	SHA1	Message	Date
openhands	f0de6f9699	test(cli): Update tests for markdown rendering and agent message display	2025-08-10 18:21:38 +00:00
openhands	cc4b663cf7	refactor(cli): Combine agent message and finish display functions	2025-08-10 18:20:23 +00:00
openhands	7f9a43e217	feat(cli): Add markdown rendering for agent messages	2025-08-10 18:17:33 +00:00
Xingyao Wang	116ba199d1	feat(agent): stop using short tool description for gpt-5 (#10184 )	2025-08-09 17:56:52 -04:00
Boxuan Li	803bdced9c	Fix Windows prompt refinement: ensure 'bash' is replaced with 'powershell' in all prompts (#10179 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 20:28:36 -07:00
Xingyao Wang	3eecac2003	docs: Add GPT-5 model recommendation and fix pricing display issue (#10177 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 19:19:59 +00:00
mamoodi	c02e09fc2d	Hide Git Settings section from Application settings (#10176 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 19:06:40 +00:00
Tim O'Farrell	18f8661770	feat: add mcp_shttp_servers override to conversation initialization (#10171 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 18:05:44 +00:00
Xingyao Wang	04ff4a025b	feat(cli): Use CLI to launch OpenHands UI server via Docker (#9783 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-09 02:04:07 +08:00
mamoodi	81ef363658	Increase stale bot inactivity time and better messaging (#10167 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2025-08-08 16:41:15 +00:00
Xingyao Wang	1474c5bc1c	Support gpt-5-2025-08-07 and add it to OpenHands provider (#10172 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 16:05:51 +00:00
sp.wack	9b0a5da839	Use EventStore directly in remember prompt; merge client services (#10143 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 18:03:03 +04:00
Graham Neubig	7ab2ad2c1b	Fix authentication setup issues in unit tests (#10118 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-07 22:12:21 -04:00
Graham Neubig	8416a019cb	Fix unit test failures by prioritizing current directory in PYTHONPATH (#10105 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-07 22:12:02 -04:00
Engel Nyst	73a7c7786d	Load previous conversation by id (CLI) (#10156 )	2025-08-07 23:09:20 +02:00
aeft	11d12c5a01	fix: prevent CLI argument parser defaults from overriding config file values (#10140 )	2025-08-08 04:48:04 +08:00
Xingyao Wang	c4f303a07b	chore(eval): Remove eval_infer_remote.sh script and related references (#10157 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-07 20:46:59 +00:00
Kenny Dizi	3a629cdf08	Add support model `claude-opus-4-1-20250805` (#10120 )	2025-08-07 18:48:34 +00:00
sp.wack	6ea33b657d	chore(frontend): Remove some dead code (#10121 )	2025-08-08 02:40:35 +08:00
Xingyao Wang	a526f53181	Add uvx CLI command to PR descriptions (#10142 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-08 01:51:55 +08:00