Compare commits

..

20 Commits

Author SHA1 Message Date
openhands
f0de6f9699 test(cli): Update tests for markdown rendering and agent message display 2025-08-10 18:21:38 +00:00
openhands
cc4b663cf7 refactor(cli): Combine agent message and finish display functions 2025-08-10 18:20:23 +00:00
openhands
7f9a43e217 feat(cli): Add markdown rendering for agent messages 2025-08-10 18:17:33 +00:00
Xingyao Wang
116ba199d1 feat(agent): stop using short tool description for gpt-5 (#10184) 2025-08-09 17:56:52 -04:00
Boxuan Li
803bdced9c Fix Windows prompt refinement: ensure 'bash' is replaced with 'powershell' in all prompts (#10179)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 20:28:36 -07:00
Xingyao Wang
3eecac2003 docs: Add GPT-5 model recommendation and fix pricing display issue (#10177)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 19:19:59 +00:00
mamoodi
c02e09fc2d Hide Git Settings section from Application settings (#10176)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 19:06:40 +00:00
Tim O'Farrell
18f8661770 feat: add mcp_shttp_servers override to conversation initialization (#10171)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 18:05:44 +00:00
Xingyao Wang
04ff4a025b feat(cli): Use CLI to launch OpenHands UI server via Docker (#9783)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-09 02:04:07 +08:00
mamoodi
81ef363658 Increase stale bot inactivity time and better messaging (#10167)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-08-08 16:41:15 +00:00
Xingyao Wang
1474c5bc1c Support gpt-5-2025-08-07 and add it to OpenHands provider (#10172)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 16:05:51 +00:00
sp.wack
9b0a5da839 Use EventStore directly in remember prompt; merge client services (#10143)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 18:03:03 +04:00
Graham Neubig
7ab2ad2c1b Fix authentication setup issues in unit tests (#10118)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-07 22:12:21 -04:00
Graham Neubig
8416a019cb Fix unit test failures by prioritizing current directory in PYTHONPATH (#10105)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-07 22:12:02 -04:00
Engel Nyst
73a7c7786d Load previous conversation by id (CLI) (#10156) 2025-08-07 23:09:20 +02:00
aeft
11d12c5a01 fix: prevent CLI argument parser defaults from overriding config file values (#10140) 2025-08-08 04:48:04 +08:00
Xingyao Wang
c4f303a07b chore(eval): Remove eval_infer_remote.sh script and related references (#10157)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-07 20:46:59 +00:00
Kenny Dizi
3a629cdf08 Add support model claude-opus-4-1-20250805 (#10120) 2025-08-07 18:48:34 +00:00
sp.wack
6ea33b657d chore(frontend): Remove some dead code (#10121) 2025-08-08 02:40:35 +08:00
Xingyao Wang
a526f53181 Add uvx CLI command to PR descriptions (#10142)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 01:51:55 +08:00
84 changed files with 1533 additions and 817 deletions

71
.github/scripts/update_pr_description.sh vendored Executable file
View File

@@ -0,0 +1,71 @@
#!/bin/bash
set -euxo pipefail
# This script updates the PR description with commands to run the PR locally
# It adds both Docker and uvx commands
# Get the branch name for the PR
BRANCH_NAME=$(gh pr view "$PR_NUMBER" --json headRefName --jq .headRefName)
# Define the Docker command
DOCKER_RUN_COMMAND="docker run -it --rm \
-p 3000:3000 \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${SHORT_SHA}-nikolaik \
--name openhands-app-${SHORT_SHA} \
docker.all-hands.dev/all-hands-ai/openhands:${SHORT_SHA}"
# Define the uvx command
UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@${BRANCH_NAME} openhands"
# Get the current PR body
PR_BODY=$(gh pr view "$PR_NUMBER" --json body --jq .body)
# Prepare the new PR body with both commands
if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
# For existing PR descriptions, use a more robust approach
# Split the PR body at the "To run this PR locally" section and replace everything after it
BEFORE_SECTION=$(echo "$PR_BODY" | sed '/To run this PR locally, use the following command:/,$d')
NEW_PR_BODY=$(cat <<EOF
${BEFORE_SECTION}
To run this PR locally, use the following command:
GUI with Docker:
\`\`\`
${DOCKER_RUN_COMMAND}
\`\`\`
CLI with uvx:
\`\`\`
${UVX_RUN_COMMAND}
\`\`\`
EOF
)
else
# For new PR descriptions: use heredoc safely without indentation
NEW_PR_BODY=$(cat <<EOF
$PR_BODY
---
To run this PR locally, use the following command:
GUI with Docker:
\`\`\`
${DOCKER_RUN_COMMAND}
\`\`\`
CLI with uvx:
\`\`\`
${UVX_RUN_COMMAND}
\`\`\`
EOF
)
fi
# Update the PR description
echo "Updating PR description with Docker and uvx commands"
gh pr edit "$PR_NUMBER" --body "$NEW_PR_BODY"

View File

@@ -332,29 +332,5 @@ jobs:
SHORT_SHA: ${{ steps.short_sha.outputs.SHORT_SHA }}
shell: bash
run: |
echo "updating PR description"
DOCKER_RUN_COMMAND="docker run -it --rm \
-p 3000:3000 \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:$SHORT_SHA-nikolaik \
--name openhands-app-$SHORT_SHA \
docker.all-hands.dev/all-hands-ai/openhands:$SHORT_SHA"
PR_BODY=$(gh pr view $PR_NUMBER --json body --jq .body)
if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
UPDATED_PR_BODY=$(echo "${PR_BODY}" | sed -E "s|docker run -it --rm.*|$DOCKER_RUN_COMMAND|")
else
UPDATED_PR_BODY="${PR_BODY}
---
To run this PR locally, use the following command:
\`\`\`
$DOCKER_RUN_COMMAND
\`\`\`"
fi
echo "updated body: $UPDATED_PR_BODY"
gh pr edit $PR_NUMBER --body "$UPDATED_PR_BODY"
echo "Updating PR description with Docker and uvx commands"
bash ${GITHUB_WORKSPACE}/.github/scripts/update_pr_description.sh

View File

@@ -48,11 +48,11 @@ jobs:
- name: Build Environment
run: make build
- name: Run Unit Tests
run: poetry run pytest --forked -n auto -svv ./tests/unit
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
- name: Run Runtime Tests with CLIRuntime
run: TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
- name: Run E2E Tests
run: poetry run pytest -svv tests/e2e
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e
# Run specific Windows python tests
test-on-windows:
@@ -77,9 +77,11 @@ jobs:
- name: Run Windows unit tests
run: poetry run pytest -svv tests/unit/test_windows_bash.py
env:
PYTHONPATH: ".;$env:PYTHONPATH"
DEBUG: "1"
- name: Run Windows runtime tests with LocalRuntime
run: $env:TEST_RUNTIME="local"; poetry run pytest -svv tests/runtime/test_bash.py
env:
PYTHONPATH: ".;$env:PYTHONPATH"
TEST_RUNTIME: local
DEBUG: "1"

View File

@@ -12,11 +12,11 @@ jobs:
steps:
- uses: actions/stale@v9
with:
stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
days-before-stale: 30
stale-issue-message: 'This issue is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
stale-pr-message: 'This PR is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
days-before-stale: 40
exempt-issue-labels: 'roadmap'
close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
days-before-close: 7
close-issue-message: 'This issue was automatically closed due to 50 days of inactivity. We do this to help keep the issues somewhat manageable and focus on active issues.'
close-pr-message: 'This PR was closed because it had no activity for 50 days. If you feel this was closed in error, and you would like to continue the PR, please resubmit or let us know.'
days-before-close: 10
operations-per-run: 150

View File

@@ -58,34 +58,34 @@ RUN sed -i 's/^UID_MIN.*/UID_MIN 499/' /etc/login.defs
# Default is 60000, but we've seen up to 200000
RUN sed -i 's/^UID_MAX.*/UID_MAX 1000000/' /etc/login.defs
RUN groupadd --gid $OPENHANDS_USER_ID openhands
RUN groupadd --gid $OPENHANDS_USER_ID app
RUN useradd -l -m -u $OPENHANDS_USER_ID --gid $OPENHANDS_USER_ID -s /bin/bash openhands && \
usermod -aG openhands openhands && \
usermod -aG app openhands && \
usermod -aG sudo openhands && \
echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
RUN chown -R openhands:openhands /app && chmod -R 770 /app
RUN sudo chown -R openhands:openhands $WORKSPACE_BASE && sudo chmod -R 770 $WORKSPACE_BASE
RUN chown -R openhands:app /app && chmod -R 770 /app
RUN sudo chown -R openhands:app $WORKSPACE_BASE && sudo chmod -R 770 $WORKSPACE_BASE
USER openhands
ENV VIRTUAL_ENV=/app/.venv \
PATH="/app/.venv/bin:$PATH" \
PYTHONPATH='/app'
COPY --chown=openhands:openhands --chmod=770 --from=backend-builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --chown=openhands:app --chmod=770 --from=backend-builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --chown=openhands:openhands --chmod=770 ./microagents ./microagents
COPY --chown=openhands:openhands --chmod=770 ./openhands ./openhands
COPY --chown=openhands:openhands --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
COPY --chown=openhands:openhands pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
COPY --chown=openhands:app --chmod=770 ./microagents ./microagents
COPY --chown=openhands:app --chmod=770 ./openhands ./openhands
COPY --chown=openhands:app --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
COPY --chown=openhands:app pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
# This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
RUN python openhands/core/download.py # No-op to download assets
# Add this line to set group ownership of all files/directories not already in "app" group
# openhands:openhands -> openhands:openhands
RUN find /app \! -group openhands -exec chgrp openhands {} +
# openhands:openhands -> openhands:app
RUN find /app \! -group app -exec chgrp app {} +
COPY --chown=openhands:openhands --chmod=770 --from=frontend-builder /app/build ./frontend/build
COPY --chown=openhands:openhands --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build ./frontend/build
COPY --chown=openhands:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
USER root

View File

@@ -7,6 +7,67 @@ description: High level overview of the Graphical User Interface (GUI) in OpenHa
- [OpenHands is running](/usage/local-setup)
## Launching the GUI Server
### Using the CLI Command
You can launch the OpenHands GUI server directly from the command line using the `serve` command:
<Callout type="info">
**Prerequisites**: You need to have the [OpenHands CLI installed](/usage/how-to/cli-mode) first, OR have `uv` installed and run `uvx --python 3.12 --from openhands-ai openhands serve`. Otherwise, you'll need to use Docker directly (see the [Docker section](#using-docker-directly) below).
</Callout>
```bash
openhands serve
```
This command will:
- Check that Docker is installed and running
- Pull the required Docker images
- Launch the OpenHands GUI server at http://localhost:3000
- Use the same configuration directory (`~/.openhands`) as the CLI mode
#### Mounting Your Current Directory
To mount your current working directory into the GUI server container, use the `--mount-cwd` flag:
```bash
openhands serve --mount-cwd
```
This is useful when you want to work on files in your current directory through the GUI. The directory will be mounted at `/workspace` inside the container.
#### Using GPU Support
If you have NVIDIA GPUs and want to make them available to the OpenHands container, use the `--gpu` flag:
```bash
openhands serve --gpu
```
This will enable GPU support via nvidia-docker, mounting all available GPUs into the container. You can combine this with other flags:
```bash
openhands serve --gpu --mount-cwd
```
**Prerequisites for GPU support:**
- NVIDIA GPU drivers must be installed on your host system
- [NVIDIA Container Toolkit (nvidia-docker2)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) must be installed and configured
#### Requirements
Before using the `openhands serve` command, ensure that:
- Docker is installed and running on your system
- You have internet access to pull the required Docker images
- Port 3000 is available on your system
The CLI will automatically check these requirements and provide helpful error messages if anything is missing.
### Using Docker Directly
Alternatively, you can run the GUI server using Docker directly. See the [local setup guide](/usage/local-setup) for detailed Docker instructions.
## Overview
### Initial Setup

View File

@@ -18,7 +18,7 @@ Based on these findings and community feedback, these are the latest models that
### Cloud / API-Based Models
- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommended)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
- [openai/gpt-5-2025-08-07](https://openai.com/api/) (recommended)
- [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
- [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
- [moonshot/kimi-k2-0711-preview](https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2)

View File

@@ -32,4 +32,4 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
Pricing follows official API provider rates. [You can view model prices here.](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: $0.4 per million input tokens and $1.6 per million output tokens.
For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: \$0.4 per million input tokens and \$1.6 per million output tokens.

View File

@@ -66,6 +66,30 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
### Start the App
#### Option 1: Using the CLI Launcher (Recommended)
If you have Python 3.12+ installed, you can use the CLI launcher for a simpler experience:
```bash
# Install OpenHands
pip install openhands-ai
# Launch the GUI server
openhands serve
# Or with GPU support (requires nvidia-docker)
openhands serve --gpu
# Or with current directory mounted
openhands serve --mount-cwd
```
Or using `uvx --python 3.12 --from openhands-ai openhands serve` if you have [uv](https://docs.astral.sh/uv/) installed.
This will automatically handle Docker requirements checking, image pulling, and launching the GUI server. The `--gpu` flag enables GPU support via nvidia-docker, and `--mount-cwd` mounts your current directory into the container.
#### Option 2: Using Docker Directly
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.51-nikolaik

View File

@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -172,7 +172,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--answerer_model', '-a', default='gpt-3.5-turbo', help='answerer model'
)

View File

@@ -26,8 +26,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -525,7 +525,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -31,8 +31,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
load_from_toml,
)
from openhands.core.config.utils import get_agent_config_arg
@@ -294,7 +294,7 @@ Here is the task:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--level',
type=str,

View File

@@ -20,8 +20,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -134,7 +134,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--hubs',
type=str,

View File

@@ -38,8 +38,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -312,7 +312,7 @@ Ok now its time to start solving the question. Good luck!
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
# data split must be one of 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended'
parser.add_argument(
'--data-split',

View File

@@ -21,7 +21,7 @@ from evaluation.utils.shared import (
from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_parser,
get_evaluation_parser,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
@@ -167,7 +167,7 @@ def process_predictions(predictions_path: str):
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'-s',
'--eval-split',

View File

@@ -30,8 +30,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
@@ -358,7 +358,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'-s',
'--eval-split',

View File

@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -267,7 +267,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -23,8 +23,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -229,7 +229,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
SUBSETS = [
# Eurus subset: https://arxiv.org/abs/2404.02078

View File

@@ -4,7 +4,11 @@ import pprint
import tqdm
from openhands.core.config import get_llm_config_arg, get_parser, load_openhands_config
from openhands.core.config import (
get_evaluation_parser,
get_llm_config_arg,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
from openhands.llm.llm import LLM
@@ -111,7 +115,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--json_file_path',
type=str,

View File

@@ -34,8 +34,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
@@ -273,7 +273,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'-s',
'--eval-split',

View File

@@ -30,7 +30,7 @@ from evaluation.utils.shared import (
from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_parser,
get_evaluation_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
@@ -323,7 +323,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--input-file',
type=str,

View File

@@ -32,8 +32,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -772,7 +772,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
if __name__ == '__main__':
# pdb.set_trace()
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -21,8 +21,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -239,7 +239,7 @@ If the program uses some packages that are incompatible, please figure out alter
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--use-knowledge',
type=str,

View File

@@ -183,24 +183,7 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
- `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
- `logs/`: a directory of test logs
### Run evaluation with `RemoteRuntime`
OpenHands Remote Runtime is currently in beta (read [here](https://runtime.all-hands.dev/) for more details), it allows you to run rollout in parallel in the cloud, so you don't need a powerful machine to run evaluation.
Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
```bash
./evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
# Example - This evaluates patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe-bench-lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_100_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
```
To clean-up all existing runtimes that you've already started, run:
```bash
ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
```
## SWT-Bench Evaluation

View File

@@ -26,7 +26,7 @@ from evaluation.utils.shared import (
from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_parser,
get_evaluation_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
@@ -353,7 +353,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--input-file',
type=str,

View File

@@ -43,8 +43,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.config.utils import get_condenser_config_arg
@@ -732,7 +732,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -28,8 +28,8 @@ from evaluation.utils.shared import (
)
from openhands.controller.state.state import State
from openhands.core.config import (
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.config.utils import get_condenser_config_arg
@@ -201,7 +201,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -644,7 +644,7 @@ SWEGYM_EXCLUDE_IDS = [
]
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -1,46 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
INPUT_FILE=$1
NUM_WORKERS=$2
DATASET=$3
SPLIT=$4
if [ -z "$INPUT_FILE" ]; then
echo "INPUT_FILE not specified (should be a path to a jsonl file)"
exit 1
fi
if [ -z "$DATASET" ]; then
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
DATASET="princeton-nlp/SWE-bench_Lite"
fi
if [ -z "$SPLIT" ]; then
echo "SPLIT not specified, use default test"
SPLIT="test"
fi
if [ -z "$NUM_WORKERS" ]; then
echo "NUM_WORKERS not specified, use default 1"
NUM_WORKERS=1
fi
echo "... Evaluating on $INPUT_FILE ..."
COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
--eval-num-workers $NUM_WORKERS \
--input-file $INPUT_FILE \
--dataset $DATASET \
--split $SPLIT"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
# update the output with evaluation results
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE

View File

@@ -5,8 +5,7 @@ pynguin_ids = ['pydata__xarray-6548-16541', 'pydata__xarray-7003-16557', 'pydata
ids = ['pydata__xarray-3114-16452', 'pydata__xarray-3151-16453', 'pydata__xarray-3156-16454', 'pydata__xarray-3239-16456', 'pydata__xarray-3239-16457', 'pydata__xarray-3239-16458', 'pydata__xarray-3302-16459', 'pydata__xarray-3364-16461', 'pydata__xarray-3677-16471', 'pydata__xarray-3905-16478', 'pydata__xarray-4182-16484', 'pydata__xarray-4248-16486', 'pydata__xarray-4339-16487', 'pydata__xarray-4419-16488', 'pydata__xarray-4629-16492', 'pydata__xarray-4750-16496', 'pydata__xarray-4802-16505', 'pydata__xarray-4966-16515', 'pydata__xarray-4994-16516', 'pydata__xarray-5033-16517', 'pydata__xarray-5126-16518', 'pydata__xarray-5126-16519', 'pydata__xarray-5131-16520', 'pydata__xarray-5365-16529', 'pydata__xarray-5455-16530', 'pydata__xarray-5662-16532', 'pydata__xarray-5731-16534', 'pydata__xarray-6135-16535', 'pydata__xarray-6135-16536', 'pydata__xarray-6386-16537', 'pydata__xarray-6394-16538', 'pydata__xarray-6400-16539', 'pydata__xarray-6461-16540', 'pydata__xarray-6548-16541', 'pydata__xarray-6599-16543', 'pydata__xarray-6601-16544', 'pydata__xarray-6882-16548', 'pydata__xarray-6889-16549', 'pydata__xarray-7003-16557', 'pydata__xarray-7147-16571', 'pydata__xarray-7150-16572', 'pydata__xarray-7203-16577', 'pydata__xarray-7229-16578', 'pydata__xarray-7393-16581', 'pydata__xarray-7400-16582']
Command eval (our approach):
poetry run ./evaluation/benchmarks/testgeneval/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/kjain14__testgeneval-test/CodeActAgent/gpt-4o_maxiter_25_N_v0.20.0-no-hint-run_1/output.jsonl 10 kjain14/testgeneval test true
Command run (our approach):
./evaluation/benchmarks/testgeneval/scripts/run_infer.sh llm.eval_gpt HEAD CodeActAgent -1 25 10 kjain14/testgeneval test 1 ../TestGenEval/results/testgeneval/preds/gpt-4o-2024-08-06__testgeneval__0.2__test.jsonl

View File

@@ -41,7 +41,7 @@ from evaluation.utils.shared import (
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.core.config import OpenHandsConfig, SandboxConfig, get_parser
from openhands.core.config import OpenHandsConfig, SandboxConfig, get_evaluation_parser
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
@@ -484,7 +484,7 @@ def count_and_log_fields(evaluated_predictions, fields, key):
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--input-file', type=str, required=True, help='Path to input predictions file'
)

View File

@@ -37,8 +37,8 @@ from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
SandboxConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -491,7 +491,7 @@ def prepare_dataset_pre(dataset: pd.DataFrame, filter_column: str) -> pd.DataFra
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -18,8 +18,8 @@ from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_agent_config_arg,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.config.agent_config import AgentConfig
from openhands.core.logger import openhands_logger as logger
@@ -197,7 +197,7 @@ def run_evaluator(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--task-image-name',
type=str,

View File

@@ -19,8 +19,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -157,7 +157,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -565,7 +565,7 @@ SWEGYM_EXCLUDE_IDS = [
]
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { FileService } from "#/api/file-service/file-service.api";
import OpenHands from "#/api/open-hands";
import {
FILE_VARIANTS_1,
FILE_VARIANTS_2,
@@ -10,20 +10,20 @@ import {
* You can find the mock handlers in `frontend/src/mocks/file-service-handlers.ts`.
*/
describe("FileService", () => {
describe("OpenHands File API", () => {
it("should get a list of files", async () => {
await expect(FileService.getFiles("test-conversation-id")).resolves.toEqual(
await expect(OpenHands.getFiles("test-conversation-id")).resolves.toEqual(
FILE_VARIANTS_1,
);
await expect(
FileService.getFiles("test-conversation-id-2"),
OpenHands.getFiles("test-conversation-id-2"),
).resolves.toEqual(FILE_VARIANTS_2);
});
it("should get content of a file", async () => {
await expect(
FileService.getFile("test-conversation-id", "file1.txt"),
OpenHands.getFile("test-conversation-id", "file1.txt"),
).resolves.toEqual("Content of file1.txt");
});
});

View File

@@ -3,8 +3,6 @@ import { afterEach, describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { LaunchMicroagentModal } from "#/components/features/chat/microagent/launch-microagent-modal";
import { MemoryService } from "#/api/memory-service/memory-service.api";
import { FileService } from "#/api/file-service/file-service.api";
import { I18nKey } from "#/i18n/declaration";
vi.mock("react-router", async () => ({

View File

@@ -82,5 +82,11 @@ describe("extractModelAndProvider", () => {
model: "claude-opus-4-20250514",
separator: "/",
});
expect(extractModelAndProvider("claude-opus-4-1-20250805")).toEqual({
provider: "anthropic",
model: "claude-opus-4-1-20250805",
separator: "/",
});
});
});

View File

@@ -1,44 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
<meta name="msapplication-TileColor" content="#da532c">
<meta name="theme-color" content="#ffffff">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta
name="description"
content="OpenHands: Code Less, Make More"
/>
<!--
Notice the use of %PUBLIC_URL% in the tags above.
It will be replaced with the URL of the `public` folder during the build.
Only files inside the `public` folder can be referenced from the HTML.
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>OpenHands</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
To begin the development, run `npm start` or `yarn start`.
To create a production bundle, use `npm run build` or `yarn build`.
-->
<script type="module" src="/src/index.tsx"></script>
</body>
</html>

View File

@@ -1,66 +0,0 @@
import { openHands } from "../open-hands-axios";
import { GetFilesResponse, GetFileResponse } from "./file-service.types";
import { getConversationUrl } from "../conversation.utils";
import { FileUploadSuccessResponse } from "../open-hands.types";
export class FileService {
/**
* Retrieve the list of files available in the workspace
* @param conversationId ID of the conversation
* @param path Path to list files from. If provided, it lists all the files in the given path
* @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
*/
static async getFiles(
conversationId: string,
path?: string,
): Promise<GetFilesResponse> {
const url = `${getConversationUrl(conversationId)}/list-files`;
const { data } = await openHands.get<GetFilesResponse>(url, {
params: { path },
});
return data;
}
/**
* Retrieve the content of a file
* @param conversationId ID of the conversation
* @param path Full path of the file to retrieve
* @returns Code content of the file
*/
static async getFile(conversationId: string, path: string): Promise<string> {
const url = `${getConversationUrl(conversationId)}/select-file`;
const { data } = await openHands.get<GetFileResponse>(url, {
params: { file: path },
});
return data.code;
}
/**
* Upload multiple files to the workspace
* @param conversationId ID of the conversation
* @param files List of files.
* @returns list of uploaded files, list of skipped files
*/
static async uploadFiles(
conversationId: string,
files: File[],
): Promise<FileUploadSuccessResponse> {
const formData = new FormData();
for (const file of files) {
formData.append("files", file);
}
const url = `${getConversationUrl(conversationId)}/upload-files`;
const response = await openHands.post<FileUploadSuccessResponse>(
url,
formData,
{
headers: {
"Content-Type": "multipart/form-data",
},
},
);
return response.data;
}
}

View File

@@ -1,5 +0,0 @@
export type GetFilesResponse = string[];
export interface GetFileResponse {
code: string;
}

View File

@@ -1,21 +0,0 @@
import { openHands } from "../open-hands-axios";
interface GetPromptResponse {
status: string;
prompt: string;
}
export class MemoryService {
static async getPrompt(
conversationId: string,
eventId: number,
): Promise<string> {
const { data } = await openHands.get<GetPromptResponse>(
`/api/conversations/${conversationId}/remember_prompt`,
{
params: { event_id: eventId },
},
);
return data.prompt;
}
}

View File

@@ -15,6 +15,9 @@ import {
GetMicroagentPromptResponse,
CreateMicroagent,
MicroagentContentResponse,
FileUploadSuccessResponse,
GetFilesResponse,
GetFileResponse,
} from "./open-hands.types";
import { openHands } from "./open-hands-axios";
import { ApiSettings, PostApiSettings, Provider } from "#/types/settings";
@@ -618,12 +621,11 @@ class OpenHands {
conversationId: string,
eventId: number,
): Promise<string> {
const { data } = await openHands.get<GetMicroagentPromptResponse>(
`/api/conversations/${conversationId}/remember_prompt`,
{
params: { event_id: eventId },
},
);
const url = `${this.getConversationUrl(conversationId)}/remember-prompt`;
const { data } = await openHands.get<GetMicroagentPromptResponse>(url, {
params: { event_id: eventId },
headers: this.getConversationHeaders(),
});
return data.prompt;
}
@@ -640,6 +642,69 @@ class OpenHands {
return data;
}
/**
* Retrieve the list of files available in the workspace
* @param conversationId ID of the conversation
* @param path Path to list files from. If provided, it lists all the files in the given path
* @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
*/
static async getFiles(
conversationId: string,
path?: string,
): Promise<GetFilesResponse> {
const url = `${this.getConversationUrl(conversationId)}/list-files`;
const { data } = await openHands.get<GetFilesResponse>(url, {
params: { path },
headers: this.getConversationHeaders(),
});
return data;
}
/**
* Retrieve the content of a file
* @param conversationId ID of the conversation
* @param path Full path of the file to retrieve
* @returns Code content of the file
*/
static async getFile(conversationId: string, path: string): Promise<string> {
const url = `${this.getConversationUrl(conversationId)}/select-file`;
const { data } = await openHands.get<GetFileResponse>(url, {
params: { file: path },
headers: this.getConversationHeaders(),
});
return data.code;
}
/**
* Upload multiple files to the workspace
* @param conversationId ID of the conversation
* @param files List of files.
* @returns list of uploaded files, list of skipped files
*/
static async uploadFiles(
conversationId: string,
files: File[],
): Promise<FileUploadSuccessResponse> {
const formData = new FormData();
for (const file of files) {
formData.append("files", file);
}
const url = `${this.getConversationUrl(conversationId)}/upload-files`;
const response = await openHands.post<FileUploadSuccessResponse>(
url,
formData,
{
headers: {
"Content-Type": "multipart/form-data",
...this.getConversationHeaders(),
},
},
);
return response.data;
}
/**
* Get the user installation IDs
* @param provider The provider to get installation IDs for (github, bitbucket, etc.)

View File

@@ -158,3 +158,9 @@ export interface MicroagentContentResponse {
git_provider: Provider;
triggers: string[];
}
export type GetFilesResponse = string[];
export interface GetFileResponse {
code: string;
}

View File

@@ -1,11 +1,11 @@
import { useMutation } from "@tanstack/react-query";
import { FileService } from "#/api/file-service/file-service.api";
import OpenHands from "#/api/open-hands";
export const useUploadFiles = () =>
useMutation({
mutationKey: ["upload-files"],
mutationFn: (variables: { conversationId: string; files: File[] }) =>
FileService.uploadFiles(variables.conversationId!, variables.files),
OpenHands.uploadFiles(variables.conversationId!, variables.files),
onSuccess: async () => {},
meta: {
disableToast: true,

View File

@@ -1,13 +1,13 @@
import { useQuery } from "@tanstack/react-query";
import { useConversationId } from "../use-conversation-id";
import { FileService } from "#/api/file-service/file-service.api";
import OpenHands from "#/api/open-hands";
export const useGetMicroagents = (microagentDirectory: string) => {
const { conversationId } = useConversationId();
return useQuery({
queryKey: ["files", "microagents", conversationId, microagentDirectory],
queryFn: () => FileService.getFiles(conversationId!, microagentDirectory),
queryFn: () => OpenHands.getFiles(conversationId!, microagentDirectory),
enabled: !!conversationId,
select: (data) =>
data.map((fileName) => fileName.replace(microagentDirectory, "")),

View File

@@ -1,5 +1,5 @@
import { useQuery } from "@tanstack/react-query";
import { MemoryService } from "#/api/memory-service/memory-service.api";
import OpenHands from "#/api/open-hands";
import { useConversationId } from "../use-conversation-id";
export const useMicroagentPrompt = (eventId: number) => {
@@ -7,7 +7,7 @@ export const useMicroagentPrompt = (eventId: number) => {
return useQuery({
queryKey: ["memory", "prompt", conversationId, eventId],
queryFn: () => MemoryService.getPrompt(conversationId!, eventId),
queryFn: () => OpenHands.getMicroagentPrompt(conversationId!, eventId),
enabled: !!conversationId,
staleTime: 1000 * 60 * 5, // 5 minutes
gcTime: 1000 * 60 * 15, // 15 minutes

View File

@@ -222,7 +222,7 @@ function AppSettingsScreen() {
className="w-full max-w-[680px]" // Match the width of the language field
/>
<div className="border-t border-t-tertiary pt-6 mt-2">
<div className="border-t border-t-tertiary pt-6 mt-2 hidden">
<h3 className="text-lg font-medium mb-4">
{t(I18nKey.SETTINGS$GIT_SETTINGS)}
</h3>

View File

@@ -14,6 +14,7 @@ export const VERIFIED_MODELS = [
"claude-3-7-sonnet-20250219",
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
"gemini-2.5-pro",
"o4-mini",
"deepseek-chat",
@@ -22,11 +23,13 @@ export const VERIFIED_MODELS = [
"devstral-medium-2507",
"kimi-k2-0711-preview",
"qwen3-coder-480b",
"gpt-5-2025-08-07",
];
// LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
// (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
export const VERIFIED_OPENAI_MODELS = [
"gpt-5-2025-08-07",
"gpt-4o",
"gpt-4o-mini",
"gpt-4.1",
@@ -47,6 +50,7 @@ export const VERIFIED_ANTHROPIC_MODELS = [
"claude-3-7-sonnet-20250219",
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
];
// LiteLLM does not return the compatible Mistral models with the provider, so we list them here to set them ourselves
@@ -61,7 +65,9 @@ export const VERIFIED_MISTRAL_MODELS = [
// (e.g., they return `claude-sonnet-4-20250514` instead of `openhands/claude-sonnet-4-20250514`)
export const VERIFIED_OPENHANDS_MODELS = [
"claude-sonnet-4-20250514",
"gpt-5-2025-08-07",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
"gemini-2.5-pro",
"o3",
"o4-mini",

View File

@@ -106,10 +106,15 @@ class CodeActAgent(Agent):
def _get_tools(self) -> list['ChatCompletionToolParam']:
# For these models, we use short tool descriptions ( < 1024 tokens)
# to avoid hitting the OpenAI token limit for tool descriptions.
SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-4', 'o3', 'o1', 'o4']
use_short_tool_desc = False
if self.llm is not None:
# For historical reasons, previously OpenAI enforces max function description length of 1k characters
# https://community.openai.com/t/function-call-description-max-length/529902
# But it no longer seems to be an issue recently
# https://community.openai.com/t/was-the-character-limit-for-schema-descriptions-upgraded/1225975
# Tested on GPT-5 and longer description still works. But we still keep the logic to be safe for older models.
use_short_tool_desc = any(
model_substr in self.llm.config.model
for model_substr in SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS

View File

@@ -1,3 +1,4 @@
import re
import sys
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
@@ -37,7 +38,16 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.
def refine_prompt(prompt: str):
if sys.platform == 'win32':
return prompt.replace('bash', 'powershell')
# Replace 'bash' with 'powershell' including tool names like 'execute_bash'
# First replace 'execute_bash' with 'execute_powershell' to handle tool names
result = re.sub(
r'\bexecute_bash\b', 'execute_powershell', prompt, flags=re.IGNORECASE
)
# Then replace standalone 'bash' with 'powershell'
result = re.sub(
r'(?<!execute_)(?<!_)\bbash\b', 'powershell', result, flags=re.IGNORECASE
)
return result
return prompt

View File

@@ -0,0 +1 @@
"""OpenHands CLI module."""

54
openhands/cli/entry.py Normal file
View File

@@ -0,0 +1,54 @@
"""Main entry point for OpenHands CLI with subcommand support."""
import sys
import openhands
import openhands.cli.suppress_warnings # noqa: F401
from openhands.cli.gui_launcher import launch_gui_server
from openhands.cli.main import run_cli_command
from openhands.core.config import get_cli_parser
from openhands.core.config.arg_utils import get_subparser
def main():
"""Main entry point with subcommand support and backward compatibility."""
parser = get_cli_parser()
# If user only asks for --help or -h without a subcommand
if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'):
# Print top-level help
print(parser.format_help())
# Also print help for `cli` subcommand
print('\n' + '=' * 80)
print('CLI command help:\n')
cli_parser = get_subparser(parser, 'cli')
print(cli_parser.format_help())
sys.exit(0)
# Special case: no subcommand provided, simulate "openhands cli"
if len(sys.argv) == 1 or (
len(sys.argv) > 1 and sys.argv[1] not in ['cli', 'serve']
):
# Inject 'cli' as default command
sys.argv.insert(1, 'cli')
args = parser.parse_args()
if hasattr(args, 'version') and args.version:
print(f'OpenHands CLI version: {openhands.get_version()}')
sys.exit(0)
if args.command == 'serve':
launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
elif args.command == 'cli' or args.command is None:
run_cli_command(args)
else:
parser.print_help()
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,219 @@
"""GUI launcher for OpenHands CLI."""
import os
import shutil
import subprocess
import sys
from pathlib import Path
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from openhands import __version__
def _format_docker_command_for_logging(cmd: list[str]) -> str:
"""Format a Docker command for logging with grey color.
Args:
cmd (list[str]): The Docker command as a list of strings
Returns:
str: The formatted command string in grey HTML color
"""
cmd_str = ' '.join(cmd)
return f'<grey>Running Docker command: {cmd_str}</grey>'
def check_docker_requirements() -> bool:
"""Check if Docker is installed and running.
Returns:
bool: True if Docker is available and running, False otherwise.
"""
# Check if Docker is installed
if not shutil.which('docker'):
print_formatted_text(
HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
)
print_formatted_text(
HTML(
'<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
)
)
return False
# Check if Docker daemon is running
try:
result = subprocess.run(
['docker', 'info'], capture_output=True, text=True, timeout=10
)
if result.returncode != 0:
print_formatted_text(
HTML('<ansired>❌ Docker daemon is not running.</ansired>')
)
print_formatted_text(
HTML('<grey>Please start Docker and try again.</grey>')
)
return False
except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
print_formatted_text(
HTML('<ansired>❌ Failed to check Docker status.</ansired>')
)
print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
return False
return True
def ensure_config_dir_exists() -> Path:
"""Ensure the OpenHands configuration directory exists and return its path."""
config_dir = Path.home() / '.openhands'
config_dir.mkdir(exist_ok=True)
return config_dir
def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
"""Launch the OpenHands GUI server using Docker.
Args:
mount_cwd: If True, mount the current working directory into the container.
gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
"""
print_formatted_text(
HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
)
print_formatted_text('')
# Check Docker requirements
if not check_docker_requirements():
sys.exit(1)
# Ensure config directory exists
config_dir = ensure_config_dir_exists()
# Get the current version for the Docker image
version = __version__
runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
# Pull the runtime image first
pull_cmd = ['docker', 'pull', runtime_image]
print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
try:
subprocess.run(
pull_cmd,
check=True,
timeout=300, # 5 minutes timeout
)
except subprocess.CalledProcessError:
print_formatted_text(
HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
)
sys.exit(1)
except subprocess.TimeoutExpired:
print_formatted_text(
HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
)
sys.exit(1)
print_formatted_text('')
print_formatted_text(
HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
)
print_formatted_text(
HTML('<grey>The server will be available at: http://localhost:3000</grey>')
)
print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
print_formatted_text('')
# Build the Docker command
docker_cmd = [
'docker',
'run',
'-it',
'--rm',
'--pull=always',
'-e',
f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
'-e',
'LOG_ALL_EVENTS=true',
'-v',
'/var/run/docker.sock:/var/run/docker.sock',
'-v',
f'{config_dir}:/.openhands',
]
# Add GPU support if requested
if gpu:
print_formatted_text(
HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
)
# Add the --gpus all flag to enable all GPUs
docker_cmd.insert(2, '--gpus')
docker_cmd.insert(3, 'all')
# Add environment variable to pass GPU support to sandbox containers
docker_cmd.extend(
[
'-e',
'SANDBOX_ENABLE_GPU=true',
]
)
# Add current working directory mount if requested
if mount_cwd:
cwd = Path.cwd()
# Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
docker_cmd.extend(
[
'-e',
f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
]
)
# Set user ID for Unix-like systems only
if os.name != 'nt': # Not Windows
try:
user_id = subprocess.check_output(['id', '-u'], text=True).strip()
docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
except (subprocess.CalledProcessError, FileNotFoundError):
# If 'id' command fails or doesn't exist, skip setting user ID
pass
# Print the folder that will be mounted to inform the user
print_formatted_text(
HTML(
f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
)
)
docker_cmd.extend(
[
'-p',
'3000:3000',
'--add-host',
'host.docker.internal:host-gateway',
'--name',
'openhands-app',
app_image,
]
)
try:
# Log and run the Docker command
print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
subprocess.run(docker_cmd, check=True)
except subprocess.CalledProcessError as e:
print_formatted_text('')
print_formatted_text(
HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
)
print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
sys.exit(1)
except KeyboardInterrupt:
print_formatted_text('')
print_formatted_text(
HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
)
sys.exit(0)

View File

@@ -45,7 +45,6 @@ from openhands.controller import AgentController
from openhands.controller.agent import Agent
from openhands.core.config import (
OpenHandsConfig,
parse_arguments,
setup_config_from_args,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
@@ -129,12 +128,13 @@ async def run_session(
conversation_instructions: str | None = None,
session_name: str | None = None,
skip_banner: bool = False,
conversation_id: str | None = None,
) -> bool:
reload_microagents = False
new_session_requested = False
exit_reason = ExitReason.INTENTIONAL
sid = generate_sid(config, session_name)
sid = conversation_id or generate_sid(config, session_name)
is_loaded = asyncio.Event()
is_paused = asyncio.Event() # Event to track agent pause requests
always_confirm_mode = False # Flag to enable always confirm mode
@@ -523,10 +523,8 @@ def run_alias_setup_flow(config: OpenHandsConfig) -> None:
print_formatted_text('')
async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
async def main_with_loop(loop: asyncio.AbstractEventLoop, args) -> None:
"""Runs the agent in CLI mode."""
args = parse_arguments()
# Set log level from command line argument if provided
if args.log_level and isinstance(args.log_level, str):
log_level = getattr(logging, str(args.log_level).upper())
@@ -574,13 +572,9 @@ async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
# Use settings from settings store if available and override with command line arguments
if settings:
# Handle agent configuration
if args.agent_cls:
config.default_agent = str(args.agent_cls)
else:
# settings.agent is not None because we check for it in setup_config_from_args
assert settings.agent is not None
config.default_agent = settings.agent
# settings.agent is not None because we check for it in setup_config_from_args
assert settings.agent is not None
config.default_agent = settings.agent
# Handle LLM configuration with proper precedence:
# 1. CLI parameters (-l) have highest precedence (already handled in setup_config_from_args)
@@ -705,6 +699,7 @@ After reviewing the file, please ask the user what they would like to do with it
task_str,
session_name=args.name,
skip_banner=banner_shown,
conversation_id=args.conversation,
)
# If a new session was requested, run it
@@ -717,18 +712,19 @@ After reviewing the file, please ask the user what they would like to do with it
get_runtime_cls(config.runtime).teardown(config)
def main():
def run_cli_command(args):
"""Run the CLI command with proper error handling and cleanup."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(main_with_loop(loop))
loop.run_until_complete(main_with_loop(loop, args))
except KeyboardInterrupt:
print_formatted_text('⚠️ Session was interrupted: interrupted\n')
except ConnectionRefusedError as e:
print(f'Connection refused: {e}')
print_formatted_text(f'Connection refused: {e}')
sys.exit(1)
except Exception as e:
print(f'An error occurred: {e}')
print_formatted_text(f'An error occurred: {e}')
sys.exit(1)
finally:
try:
@@ -741,9 +737,5 @@ def main():
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
loop.close()
except Exception as e:
print(f'Error during cleanup: {e}')
print_formatted_text(f'Error during cleanup: {e}')
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -27,7 +27,7 @@ from openhands.core.config.condenser_config import (
CondenserPipelineConfig,
ConversationWindowCondenserConfig,
)
from openhands.core.config.utils import OH_DEFAULT_AGENT
from openhands.core.config.config_utils import OH_DEFAULT_AGENT
from openhands.memory.condenser.impl.llm_summarizing_condenser import (
LLMSummarizingCondenserConfig,
)

View File

@@ -5,7 +5,9 @@
import asyncio
import contextlib
import datetime
import io
import json
import shutil
import sys
import threading
import time
@@ -28,6 +30,8 @@ from prompt_toolkit.patch_stdout import patch_stdout
from prompt_toolkit.shortcuts import print_container
from prompt_toolkit.styles import Style
from prompt_toolkit.widgets import Frame, TextArea
from rich.console import Console
from rich.markdown import Markdown
from openhands import __version__
from openhands.core.config import OpenHandsConfig
@@ -36,6 +40,7 @@ from openhands.events import EventSource, EventStream
from openhands.events.action import (
Action,
ActionConfirmationStatus,
AgentFinishAction,
ChangeAgentStateAction,
CmdRunAction,
MCPAction,
@@ -65,10 +70,12 @@ MAX_RECENT_THOUGHTS = 5
# Color and styling constants
COLOR_GOLD = '#FFD700'
COLOR_GREY = '#808080'
COLOR_AGENT_BLUE = '#5FAFFF' # Soft blue for all agent outputs
DEFAULT_STYLE = Style.from_dict(
{
'gold': COLOR_GOLD,
'grey': COLOR_GREY,
'agent-blue': COLOR_AGENT_BLUE,
'prompt': f'{COLOR_GOLD} bold',
}
)
@@ -252,7 +259,19 @@ def display_thought_if_new(thought: str) -> None:
def display_event(event: Event, config: OpenHandsConfig) -> None:
global streaming_output_text_area
with print_lock:
if isinstance(event, CmdRunAction):
if isinstance(event, AgentFinishAction):
# Handle agent finish actions with special styling
# Determine the message to display
if event.final_thought:
message = event.final_thought
elif event.thought:
message = event.thought
else:
message = "All done! What's next on the agenda?"
# Display with finish styling
display_agent_message(message, is_finish=True)
elif isinstance(event, CmdRunAction):
# For CmdRunAction, display thought first, then command
if hasattr(event, 'thought') and event.thought:
display_message(event.thought)
@@ -275,8 +294,8 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
if isinstance(event, MessageAction):
if event.source == EventSource.AGENT:
# Check if this message content is a duplicate thought
display_thought_if_new(event.content)
# Display agent messages with distinctive styling
display_agent_message(event.content)
elif isinstance(event, CmdOutputObservation):
display_command_output(event.content)
elif isinstance(event, FileEditObservation):
@@ -291,6 +310,24 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
display_error(event.content)
def process_markdown_for_terminal(text: str) -> str:
"""
Process markdown syntax for terminal display using Rich.
This function renders markdown as formatted text for the terminal.
"""
if not text:
return text
# Use Rich to render the markdown without width constraints
console = Console(file=io.StringIO(), highlight=False, width=None)
console.print(Markdown(text))
# Get the rendered output
rendered_text = console.file.getvalue() # type: ignore
return rendered_text.strip()
def display_message(message: str) -> None:
message = message.strip()
@@ -298,6 +335,38 @@ def display_message(message: str) -> None:
print_formatted_text(f'\n{message}')
def display_agent_message(message: str, is_finish: bool = False) -> None:
"""
Display a message from the agent with distinctive styling and markdown rendering.
Args:
message: The message content to display
is_finish: Whether this is a finish message (changes the icon)
"""
message = message.strip()
if message:
# Process markdown in the message
try:
# Process markdown for terminal display
processed_message = process_markdown_for_terminal(message)
except Exception:
# If markdown processing fails, use the original message
processed_message = message
# Choose the appropriate icon based on message type
icon = '🎯' if is_finish else '🔹'
header_text = 'Agent Finished' if is_finish else 'Agent Message'
# Print a simple header
print_formatted_text(FormattedText([('fg:' + COLOR_AGENT_BLUE, f'\n{icon} {header_text}')]))
print_formatted_text('')
# Print the message content directly without any wrapping constraints
print_formatted_text(FormattedText([('fg:' + COLOR_AGENT_BLUE, processed_message)]))
print_formatted_text('')
def display_error(error: str) -> None:
error = error.strip()

View File

@@ -150,6 +150,7 @@ def organize_models_and_providers(
VERIFIED_PROVIDERS = ['openhands', 'anthropic', 'openai', 'mistral']
VERIFIED_OPENAI_MODELS = [
'gpt-5-2025-08-07',
'o4-mini',
'gpt-4o',
'gpt-4o-mini',
@@ -164,6 +165,7 @@ VERIFIED_OPENAI_MODELS = [
VERIFIED_ANTHROPIC_MODELS = [
'claude-sonnet-4-20250514',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'claude-3-7-sonnet-20250219',
'claude-3-sonnet-20240229',
'claude-3-opus-20240229',
@@ -183,7 +185,9 @@ VERIFIED_MISTRAL_MODELS = [
VERIFIED_OPENHANDS_MODELS = [
'claude-sonnet-4-20250514',
'gpt-5-2025-08-07',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'devstral-small-2507',
'devstral-medium-2507',
'o3',

View File

@@ -1,4 +1,9 @@
from openhands.core.config.agent_config import AgentConfig
from openhands.core.config.arg_utils import (
get_cli_parser,
get_evaluation_parser,
get_headless_parser,
)
from openhands.core.config.cli_config import CLIConfig
from openhands.core.config.config_utils import (
OH_DEFAULT_AGENT,
@@ -15,7 +20,6 @@ from openhands.core.config.utils import (
finalize_config,
get_agent_config_arg,
get_llm_config_arg,
get_parser,
load_from_env,
load_from_toml,
load_openhands_config,
@@ -41,7 +45,9 @@ __all__ = [
'get_agent_config_arg',
'get_llm_config_arg',
'get_field_info',
'get_parser',
'get_cli_parser',
'get_headless_parser',
'get_evaluation_parser',
'parse_arguments',
'setup_config_from_args',
]

View File

@@ -0,0 +1,224 @@
"""Centralized command line argument configuration for OpenHands CLI and headless modes."""
import argparse
from argparse import ArgumentParser, _SubParsersAction
def get_subparser(parser: ArgumentParser, name: str) -> ArgumentParser:
for action in parser._actions:
if isinstance(action, _SubParsersAction):
if name in action.choices:
return action.choices[name]
raise ValueError(f"Subparser '{name}' not found")
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
"""Add common arguments shared between CLI and headless modes."""
parser.add_argument(
'--config-file',
type=str,
default='config.toml',
help='Path to the config file (default: config.toml in the current directory)',
)
parser.add_argument(
'-t',
'--task',
type=str,
default='',
help='The task for the agent to perform',
)
parser.add_argument(
'-f',
'--file',
type=str,
help='Path to a file containing the task. Overrides -t if both are provided.',
)
parser.add_argument(
'-n',
'--name',
help='Session name',
type=str,
default='',
)
parser.add_argument(
'--log-level',
help='Set the log level',
type=str,
default=None,
)
parser.add_argument(
'-l',
'--llm-config',
default=None,
type=str,
help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
)
parser.add_argument(
'--agent-config',
default=None,
type=str,
help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
)
parser.add_argument(
'-v', '--version', action='store_true', help='Show version information'
)
def add_evaluation_arguments(parser: argparse.ArgumentParser) -> None:
"""Add arguments specific to evaluation mode."""
# Evaluation-specific arguments
parser.add_argument(
'--eval-output-dir',
default='evaluation/evaluation_outputs/outputs',
type=str,
help='The directory to save evaluation output',
)
parser.add_argument(
'--eval-n-limit',
default=None,
type=int,
help='The number of instances to evaluate',
)
parser.add_argument(
'--eval-num-workers',
default=4,
type=int,
help='The number of workers to use for evaluation',
)
parser.add_argument(
'--eval-note',
default=None,
type=str,
help='The note to add to the evaluation directory',
)
parser.add_argument(
'--eval-ids',
default=None,
type=str,
help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
)
def add_headless_specific_arguments(parser: argparse.ArgumentParser) -> None:
"""Add arguments specific to headless mode (full evaluation suite)."""
parser.add_argument(
'-d',
'--directory',
type=str,
help='The working directory for the agent',
)
parser.add_argument(
'-c',
'--agent-cls',
default=None,
type=str,
help='Name of the default agent to use',
)
parser.add_argument(
'-i',
'--max-iterations',
default=None,
type=int,
help='The maximum number of iterations to run the agent',
)
parser.add_argument(
'-b',
'--max-budget-per-task',
type=float,
help='The maximum budget allowed per task, beyond which the agent will stop.',
)
# Additional headless-specific arguments
parser.add_argument(
'--no-auto-continue',
help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
action='store_true',
default=False,
)
parser.add_argument(
'--selected-repo',
help='GitHub repository to clone (format: owner/repo)',
type=str,
default=None,
)
def get_cli_parser() -> argparse.ArgumentParser:
"""Create argument parser for CLI mode with simplified argument set."""
# Create a description with welcome message explaining available commands
description = (
'Welcome to OpenHands: Code Less, Make More\n\n'
'OpenHands supports two main commands:\n'
' serve - Launch the OpenHands GUI server (web interface)\n'
' cli - Run OpenHands in CLI mode (terminal interface)\n\n'
'Running "openhands" without a command is the same as "openhands cli"'
)
parser = argparse.ArgumentParser(
description=description,
prog='openhands',
formatter_class=argparse.RawDescriptionHelpFormatter, # Preserve formatting in description
epilog='For more information about a command, run: openhands COMMAND --help',
)
# Create subparsers
subparsers = parser.add_subparsers(
dest='command',
title='commands',
description='OpenHands supports two main commands:',
metavar='COMMAND',
)
# Add 'serve' subcommand
serve_parser = subparsers.add_parser(
'serve', help='Launch the OpenHands GUI server using Docker (web interface)'
)
serve_parser.add_argument(
'--mount-cwd',
help='Mount the current working directory into the GUI server container',
action='store_true',
default=False,
)
serve_parser.add_argument(
'--gpu',
help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
action='store_true',
default=False,
)
# Add 'cli' subcommand - import all the existing CLI arguments
cli_parser = subparsers.add_parser(
'cli', help='Run OpenHands in CLI mode (terminal interface)'
)
add_common_arguments(cli_parser)
cli_parser.add_argument(
'--override-cli-mode',
help='Override the default settings for CLI mode',
type=bool,
default=False,
)
parser.add_argument(
'--conversation',
help='The conversation id to continue',
type=str,
default=None,
)
return parser
def get_headless_parser() -> argparse.ArgumentParser:
"""Create argument parser for headless mode with full argument set."""
parser = argparse.ArgumentParser(description='Run the agent via CLI')
add_common_arguments(parser)
add_headless_specific_arguments(parser)
return parser
def get_evaluation_parser() -> argparse.ArgumentParser:
"""Create argument parser for evaluation mode."""
parser = argparse.ArgumentParser(description='Run OpenHands in evaluation mode')
add_common_arguments(parser)
add_headless_specific_arguments(parser)
add_evaluation_arguments(parser)
return parser

View File

@@ -1,3 +1,5 @@
from __future__ import annotations
import os
import re
import shlex
@@ -302,6 +304,13 @@ class MCPConfig(BaseModel):
raise ValueError(f'Invalid MCP configuration: {e}')
return mcp_mapping
def merge(self, other: MCPConfig):
return MCPConfig(
sse_servers=self.sse_servers + other.sse_servers,
stdio_servers=self.stdio_servers + other.stdio_servers,
shttp_servers=self.shttp_servers + other.shttp_servers,
)
class OpenHandsMCPConfig:
@staticmethod

View File

@@ -15,15 +15,12 @@ from pydantic import BaseModel, SecretStr, ValidationError
from openhands import __version__
from openhands.core import logger
from openhands.core.config.agent_config import AgentConfig
from openhands.core.config.arg_utils import get_headless_parser
from openhands.core.config.condenser_config import (
CondenserConfig,
condenser_config_from_toml_section,
create_condenser_config,
)
from openhands.core.config.config_utils import (
OH_DEFAULT_AGENT,
OH_MAX_ITERATIONS,
)
from openhands.core.config.extended_config import ExtendedConfig
from openhands.core.config.kubernetes_config import KubernetesConfig
from openhands.core.config.llm_config import LLMConfig
@@ -674,142 +671,9 @@ def get_condenser_config_arg(
return None
# Command line arguments
def get_parser() -> argparse.ArgumentParser:
"""Get the argument parser."""
parser = argparse.ArgumentParser(description='Run the agent via CLI')
# Add version argument
parser.add_argument(
'-v', '--version', action='store_true', help='Show version information'
)
parser.add_argument(
'--config-file',
type=str,
default='config.toml',
help='Path to the config file (default: config.toml in the current directory)',
)
parser.add_argument(
'-d',
'--directory',
type=str,
help='The working directory for the agent',
)
parser.add_argument(
'-t',
'--task',
type=str,
default='',
help='The task for the agent to perform',
)
parser.add_argument(
'-f',
'--file',
type=str,
help='Path to a file containing the task. Overrides -t if both are provided.',
)
parser.add_argument(
'-c',
'--agent-cls',
default=OH_DEFAULT_AGENT,
type=str,
help='Name of the default agent to use',
)
parser.add_argument(
'-i',
'--max-iterations',
default=OH_MAX_ITERATIONS,
type=int,
help='The maximum number of iterations to run the agent',
)
parser.add_argument(
'-b',
'--max-budget-per-task',
type=float,
help='The maximum budget allowed per task, beyond which the agent will stop.',
)
# --eval configs are for evaluations only
parser.add_argument(
'--eval-output-dir',
default='evaluation/evaluation_outputs/outputs',
type=str,
help='The directory to save evaluation output',
)
parser.add_argument(
'--eval-n-limit',
default=None,
type=int,
help='The number of instances to evaluate',
)
parser.add_argument(
'--eval-num-workers',
default=4,
type=int,
help='The number of workers to use for evaluation',
)
parser.add_argument(
'--eval-note',
default=None,
type=str,
help='The note to add to the evaluation directory',
)
parser.add_argument(
'-l',
'--llm-config',
default=None,
type=str,
help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
)
parser.add_argument(
'--agent-config',
default=None,
type=str,
help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
)
parser.add_argument(
'-n',
'--name',
help='Session name',
type=str,
default='',
)
parser.add_argument(
'--eval-ids',
default=None,
type=str,
help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
)
parser.add_argument(
'--no-auto-continue',
help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
action='store_true',
default=False,
)
parser.add_argument(
'--selected-repo',
help='GitHub repository to clone (format: owner/repo)',
type=str,
default=None,
)
parser.add_argument(
'--override-cli-mode',
help='Override the default settings for CLI mode',
type=bool,
default=False,
)
parser.add_argument(
'--log-level',
help='Set the log level',
type=str,
default=None,
)
return parser
def parse_arguments() -> argparse.Namespace:
"""Parse command line arguments."""
parser = get_parser()
parser = get_headless_parser()
args = parser.parse_args()
if args.version:
@@ -914,17 +778,17 @@ def setup_config_from_args(args: argparse.Namespace) -> OpenHandsConfig:
)
# Override default agent if provided
if args.agent_cls:
if hasattr(args, 'agent_cls') and args.agent_cls:
config.default_agent = args.agent_cls
# Set max iterations and max budget per task if provided, otherwise fall back to config values
if args.max_iterations is not None:
if hasattr(args, 'max_iterations') and args.max_iterations is not None:
config.max_iterations = args.max_iterations
if args.max_budget_per_task is not None:
if hasattr(args, 'max_budget_per_task') and args.max_budget_per_task is not None:
config.max_budget_per_task = args.max_budget_per_task
# Read selected repository in config for use by CLI and main.py
if args.selected_repo is not None:
if hasattr(args, 'selected_repo') and args.selected_repo is not None:
config.sandbox.selected_repo = args.selected_repo
return config

View File

@@ -383,7 +383,7 @@ Do NOT assume the environment is the same as in the example above.
"""
example = example.lstrip()
return example
return refine_prompt(example)
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools

View File

@@ -63,6 +63,7 @@ CACHE_PROMPT_SUPPORTED_MODELS = [
'claude-sonnet-4-20250514',
'claude-sonnet-4',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
]
# function calling supporting models
@@ -77,6 +78,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
'claude-sonnet-4-20250514',
'claude-sonnet-4',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'gpt-4o-mini',
'gpt-4o',
'o1-2024-12-17',
@@ -92,6 +94,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
'kimi-k2-instruct',
'Qwen3-Coder-480B-A35B-Instruct',
'qwen3-coder', # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
'gpt-5-2025-08-07',
]
REASONING_EFFORT_SUPPORTED_MODELS = [
@@ -105,6 +108,7 @@ REASONING_EFFORT_SUPPORTED_MODELS = [
'o4-mini-2025-04-16',
'gemini-2.5-flash',
'gemini-2.5-pro',
'gpt-5-2025-08-07',
]
MODELS_WITHOUT_STOP_WORDS = [

View File

@@ -676,9 +676,7 @@ class ActionExecutor:
if __name__ == '__main__':
logger.warning('Starting Action Execution Server')
logger.warning('Arguments passed to script:')
for i, arg in enumerate(sys.argv):
logger.warning(f'Argument {i}: {arg}')
parser = argparse.ArgumentParser()
parser.add_argument('port', type=int, help='Port to listen on')
parser.add_argument('--working-dir', type=str, help='Working directory')

View File

@@ -49,124 +49,72 @@ def init_user_and_working_directory(
if username == os.getenv('USER') and username not in ['root', 'openhands']:
return None
# Skip root since it is already created
if username != 'root':
# Check if the username already exists
logger.info(f'Attempting to create user `{username}` with UID {user_id}.')
existing_user_id = -1
try:
result = subprocess.run(
f'id -u {username}', shell=True, check=True, capture_output=True
)
existing_user_id = int(result.stdout.decode().strip())
# The user ID already exists, skip setup
if existing_user_id == user_id:
logger.debug(
f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
)
else:
logger.warning(
f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
)
return existing_user_id
return None
except subprocess.CalledProcessError as e:
# Returncode 1 indicates, that the user does not exist yet
if e.returncode == 1:
logger.info(
f'User `{username}` does not exist. Proceeding with user creation.'
)
else:
logger.error(
f'Error checking user `{username}`, skipping setup:\n{e}\n'
)
raise
# Add sudoer
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
if output.returncode != 0:
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
command = (
f'useradd -rm -d /home/{username} -s /bin/bash '
f'-g root -G sudo -u {user_id} {username}'
)
output = subprocess.run(command, shell=True, capture_output=True)
if output.returncode == 0:
logger.debug(
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
)
else:
raise RuntimeError(
f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
)
# First create the working directory, independent of the user
logger.debug(f'Client working directory: {initial_cwd}')
command = f'umask 002; mkdir -p {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str = output.stdout.decode()
logger.debug(f'mkdir command result: returncode={output.returncode}, stdout=[{out_str}], stderr=[{output.stderr.decode()}]')
# Check current ownership before changing it
check_cmd = f'ls -la {initial_cwd}'
check_output = subprocess.run(check_cmd, shell=True, capture_output=True)
logger.debug(f'Current ownership: {check_output.stdout.decode()}')
# Check if we're running as root
whoami_output = subprocess.run('whoami', shell=True, capture_output=True)
current_user = whoami_output.stdout.decode().strip()
logger.debug(f'Current user: {current_user}')
# Use sudo only if not running as root
sudo_prefix = '' if current_user == 'root' else 'sudo '
command = f'{sudo_prefix}chown -R {username}:{username} {initial_cwd}'
logger.debug(f'Executing chown command: {command}')
command = f'chown -R {username}:root {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
logger.debug(f'chown command result: returncode={output.returncode}, stdout=[{output.stdout.decode()}], stderr=[{output.stderr.decode()}]')
if output.returncode != 0 or output.stderr:
err_str = output.stderr.decode()
logger.error(f'chown command failed: returncode={output.returncode}, stderr: {err_str}')
out_str += f' [stderr: {err_str}]'
command = f'{sudo_prefix}chmod g+rw {initial_cwd}'
logger.debug(f'Executing chmod command: {command}')
command = f'chmod g+rw {initial_cwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
logger.debug(f'chmod command result: returncode={output.returncode}, stdout=[{output.stdout.decode()}], stderr=[{output.stderr.decode()}]')
if output.returncode != 0 or output.stderr:
err_str = output.stderr.decode()
logger.error(f'chmod command failed: returncode={output.returncode}, stderr: {err_str}')
out_str += f' [stderr: {err_str}]'
# Verify final ownership
check_cmd = f'ls -la {initial_cwd}'
check_output = subprocess.run(check_cmd, shell=True, capture_output=True)
final_ownership = check_output.stdout.decode()
logger.debug(f'Final ownership: {final_ownership}')
# If chown failed and directory is still owned by root, try alternative approaches
if 'root root' in final_ownership and username != 'root':
logger.warning(f'Directory {initial_cwd} is still owned by root, trying alternative approaches')
# Try to make it writable for the user's group
alt_command = f'{sudo_prefix}chmod -R g+rwx {initial_cwd}'
logger.debug(f'Executing alternative chmod command: {alt_command}')
alt_output = subprocess.run(alt_command, shell=True, capture_output=True)
logger.debug(f'Alternative chmod result: returncode={alt_output.returncode}, stderr=[{alt_output.stderr.decode()}]')
# Try to add the user to the root group (as a last resort)
if alt_output.returncode != 0:
group_command = f'{sudo_prefix}usermod -aG root {username}'
logger.debug(f'Executing usermod command: {group_command}')
group_output = subprocess.run(group_command, shell=True, capture_output=True)
logger.debug(f'Usermod result: returncode={group_output.returncode}, stderr=[{group_output.stderr.decode()}]')
logger.debug(f'Created working directory. Output: [{out_str}]')
# Skip root since it is already created
if username == 'root':
return None
# Check if the username already exists
existing_user_id = -1
try:
result = subprocess.run(
f'id -u {username}', shell=True, check=True, capture_output=True
)
existing_user_id = int(result.stdout.decode().strip())
# The user ID already exists, skip setup
if existing_user_id == user_id:
logger.debug(
f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
)
else:
logger.warning(
f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
)
return existing_user_id
return None
except subprocess.CalledProcessError as e:
# Returncode 1 indicates, that the user does not exist yet
if e.returncode == 1:
logger.debug(
f'User `{username}` does not exist. Proceeding with user creation.'
)
else:
logger.error(f'Error checking user `{username}`, skipping setup:\n{e}\n')
raise
# Add sudoer
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
if output.returncode != 0:
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
command = (
f'useradd -rm -d /home/{username} -s /bin/bash '
f'-g root -G sudo -u {user_id} {username}'
)
output = subprocess.run(command, shell=True, capture_output=True)
if output.returncode == 0:
logger.debug(
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
)
else:
raise RuntimeError(
f'Failed to create user `{username}` with UID {user_id}. Output: [{output.stderr.decode()}]'
)
return None

View File

@@ -56,8 +56,16 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/openhands/
# Add /openhands/bin to PATH
ENV PATH="/openhands/bin:${PATH}"
# Remove UID 1000 named pn or ubuntu, so the 'openhands' user can be created from ubuntu hosts
RUN (if getent passwd 1000 | grep -q pn; then userdel pn; fi) && \
(if getent passwd 1000 | grep -q ubuntu; then userdel ubuntu; fi)
# Create necessary directories
RUN mkdir -p /openhands && \
mkdir -p /openhands/logs && \
mkdir -p /openhands/poetry
# ================================================================
# Define Docker installation macro
@@ -103,29 +111,6 @@ RUN \
# Configure Docker daemon with MTU 1450 to prevent packet fragmentation issues
RUN mkdir -p /etc/docker && \
echo '{"mtu": 1450}' > /etc/docker/daemon.json
# Remove UID 1000 and GID 1000 users/groups that might conflict with openhands user
RUN (if getent passwd 1000 | grep -q pn; then userdel pn; fi) && \
(if getent passwd 1000 | grep -q ubuntu; then userdel ubuntu; fi) && \
(if getent group 1000 | grep -q pn; then groupdel pn; fi) && \
(if getent group 1000 | grep -q ubuntu; then groupdel ubuntu; fi)
# Create openhands group and user
RUN groupadd -g 1000 openhands && \
useradd -u 1000 -g 1000 -m -s /bin/bash openhands && \
usermod -aG sudo openhands && \
echo 'openhands ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
# Create necessary directories
RUN mkdir -p /openhands && \
mkdir -p /openhands/logs && \
mkdir -p /openhands/poetry && \
mkdir -p /workspace && \
mkdir -p /workspace/.openhands && \
mkdir -p /home/openhands/.openhands && \
chown -R openhands:openhands /openhands && \
chown -R openhands:openhands /workspace && \
chown -R openhands:openhands /home/openhands
{% endmacro %}
# Install Docker only if not a swebench or mswebench image
@@ -165,8 +150,7 @@ RUN if [ -z "${RELEASE_TAG}" ]; then \
if [ -d "${OPENVSCODE_SERVER_ROOT}" ]; then rm -rf "${OPENVSCODE_SERVER_ROOT}"; fi && \
mv ${RELEASE_TAG}-linux-${arch} ${OPENVSCODE_SERVER_ROOT} && \
cp ${OPENVSCODE_SERVER_ROOT}/bin/remote-cli/openvscode-server ${OPENVSCODE_SERVER_ROOT}/bin/remote-cli/code && \
rm -f ${RELEASE_TAG}-linux-${arch}.tar.gz && \
chown -R openhands:openhands ${OPENVSCODE_SERVER_ROOT}
rm -f ${RELEASE_TAG}-linux-${arch}.tar.gz
@@ -175,12 +159,10 @@ RUN if [ -z "${RELEASE_TAG}" ]; then \
{% macro install_vscode_extensions() %}
# Install our custom extension
RUN mkdir -p ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world && \
cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/hello-world/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world/ && \
chown -R openhands:openhands ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world
cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/hello-world/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-hello-world/
RUN mkdir -p ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor && \
cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/memory-monitor/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor/ && \
chown -R openhands:openhands ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor
cp -r /openhands/code/openhands/runtime/utils/vscode-extensions/memory-monitor/* ${OPENVSCODE_SERVER_ROOT}/extensions/openhands-memory-monitor/
# Some extension dirs are removed because they trigger false positives in vulnerability scans.
RUN rm -rf ${OPENVSCODE_SERVER_ROOT}/extensions/{handlebars,pug,json,diff,grunt,ini,npm}
@@ -203,12 +185,9 @@ RUN \
{% endif %}
# Set environment variables
/openhands/micromamba/bin/micromamba run -n openhands poetry run python -c "import sys; print('OH_INTERPRETER_PATH=' + sys.executable)" >> /etc/environment && \
# Set permissions and ownership
# Set permissions
chmod -R g+rws /openhands/poetry && \
chown -R openhands:openhands /openhands/poetry && \
mkdir -p /openhands/workspace && chmod -R g+rws,o+rw /openhands/workspace && \
chown -R openhands:openhands /openhands/workspace && \
chown -R openhands:openhands /openhands/micromamba && \
# Clean up
/openhands/micromamba/bin/micromamba run -n openhands poetry cache clear --all . -n && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
@@ -229,8 +208,7 @@ RUN \
RUN mkdir -p /openhands/micromamba/bin && \
/bin/bash -c "PREFIX_LOCATION=/openhands/micromamba BIN_FOLDER=/openhands/micromamba/bin INIT_YES=no CONDA_FORGE_YES=yes $(curl -L https://micro.mamba.pm/install.sh)" && \
/openhands/micromamba/bin/micromamba config remove channels defaults && \
/openhands/micromamba/bin/micromamba config list && \
chown -R openhands:openhands /openhands/micromamba
/openhands/micromamba/bin/micromamba config list
# Create the openhands virtual environment and install poetry and python
RUN /openhands/micromamba/bin/micromamba create -n openhands -y && \
@@ -241,12 +219,11 @@ RUN \
if [ -d /openhands/code ]; then rm -rf /openhands/code; fi && \
mkdir -p /openhands/code/openhands && \
touch /openhands/code/openhands/__init__.py && \
chown -R openhands:openhands /openhands/code && \
# Set global git configuration to ensure proper author/committer information
git config --global user.name "openhands" && \
git config --global user.email "openhands@all-hands.dev"
COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
COPY ./code/pyproject.toml ./code/poetry.lock /openhands/code/
{{ install_dependencies() }}
@@ -257,43 +234,14 @@ COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openh
{{ setup_vscode_server() }}
# ================================================================
# Ensure openhands user and directories exist (for non-scratch builds)
# ================================================================
{% if not build_from_scratch %}
# Remove UID 1000 and GID 1000 users/groups that might conflict with openhands user
RUN (if getent passwd 1000 | grep -q pn; then userdel pn; fi) && \
(if getent passwd 1000 | grep -q ubuntu; then userdel ubuntu; fi) && \
(if getent group 1000 | grep -q pn; then groupdel pn; fi) && \
(if getent group 1000 | grep -q ubuntu; then groupdel ubuntu; fi)
# Create openhands group and user if they don't exist
RUN (getent group openhands || groupadd -g 1000 openhands) && \
(getent passwd openhands || useradd -u 1000 -g 1000 -m -s /bin/bash openhands) && \
usermod -aG sudo openhands && \
echo 'openhands ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
# Create necessary directories and set ownership
RUN mkdir -p /openhands && \
mkdir -p /openhands/logs && \
mkdir -p /openhands/poetry && \
mkdir -p /workspace && \
mkdir -p /workspace/.openhands && \
mkdir -p /home/openhands/.openhands && \
chown -R openhands:openhands /openhands && \
chown -R openhands:openhands /workspace && \
chown -R openhands:openhands /home/openhands
{% endif %}
# ================================================================
# Copy Project source files
# ================================================================
RUN if [ -d /openhands/code/openhands ]; then rm -rf /openhands/code/openhands; fi
COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
COPY ./code/pyproject.toml ./code/poetry.lock /openhands/code/
COPY --chown=openhands:openhands ./code/openhands /openhands/code/openhands
RUN chmod a+rwx /openhands/code/openhands/__init__.py && \
chown -R openhands:openhands /openhands/code
COPY ./code/openhands /openhands/code/openhands
RUN chmod a+rwx /openhands/code/openhands/__init__.py
@@ -307,12 +255,3 @@ RUN chmod a+rwx /openhands/code/openhands/__init__.py && \
# Install extra dependencies if specified
{% if extra_deps %}RUN {{ extra_deps }} {% endif %}
# Copy entrypoint script and make it executable
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh
# Set the entrypoint to run as root first, then switch to openhands
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
# Note: We don't set USER openhands here because the entrypoint handles the user switch

View File

@@ -1,32 +0,0 @@
#!/bin/bash
set -e
# This entrypoint script runs as root to fix workspace ownership before switching to openhands user
echo "🔧 OpenHands Runtime Entrypoint - Fixing workspace ownership..."
# Check if /workspace exists and fix ownership
if [ -d "/workspace" ]; then
echo "📁 Found /workspace directory, checking ownership..."
ls -la /workspace
# Fix ownership to openhands:openhands
echo "🔧 Changing ownership to openhands:openhands..."
chown -R openhands:openhands /workspace
chmod -R g+rw /workspace
echo "✅ Ownership fixed:"
ls -la /workspace
else
echo "⚠️ /workspace directory not found, will be created later"
fi
# If arguments are provided, execute them as the openhands user
if [ $# -gt 0 ]; then
echo "🚀 Switching to openhands user and executing: $@"
# Use exec to replace the current process and preserve all arguments
exec su openhands -c "exec \"\$@\"" -- "$@"
else
echo "🚀 Switching to openhands user with bash shell"
exec su - openhands
fi

View File

@@ -10,17 +10,18 @@ from jinja2 import Environment, FileSystemLoader
from pydantic import BaseModel, ConfigDict, Field
from openhands.core.config.llm_config import LLMConfig
from openhands.core.config.mcp_config import MCPConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
ChangeAgentStateAction,
NullAction,
)
from openhands.events.event_filter import EventFilter
from openhands.events.event_store import EventStore
from openhands.events.observation import (
AgentStateChangedObservation,
NullObservation,
)
from openhands.events.stream import EventStream
from openhands.integrations.provider import (
PROVIDER_TOKEN_TYPE,
ProviderHandler,
@@ -44,11 +45,11 @@ from openhands.server.services.conversation_service import (
create_new_conversation,
setup_init_convo_settings,
)
from openhands.server.session.conversation import ServerConversation
from openhands.server.shared import (
ConversationStoreImpl,
config,
conversation_manager,
file_store,
)
from openhands.server.types import LLMAuthenticationError, MissingSettingsError
from openhands.server.user_auth import (
@@ -60,7 +61,7 @@ from openhands.server.user_auth import (
get_user_settings_store,
)
from openhands.server.user_auth.user_auth import AuthType
from openhands.server.utils import get_conversation as get_conversation_object
from openhands.server.utils import get_conversation as get_conversation_metadata
from openhands.server.utils import get_conversation_store
from openhands.storage.conversation.conversation_store import ConversationStore
from openhands.storage.data_models.conversation_metadata import (
@@ -87,6 +88,7 @@ class InitSessionRequest(BaseModel):
suggested_task: SuggestedTask | None = None
create_microagent: CreateMicroagent | None = None
conversation_instructions: str | None = None
mcp_config: MCPConfig | None = None
# Only nested runtimes require the ability to specify a conversation id, and it could be a security risk
if os.getenv('ALLOW_SET_CONVERSATION_ID', '0') == '1':
conversation_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
@@ -178,6 +180,7 @@ async def new_conversation(
conversation_instructions=conversation_instructions,
git_provider=git_provider,
conversation_id=conversation_id,
mcp_config=data.mcp_config,
)
return ConversationResponse(
@@ -331,23 +334,20 @@ async def delete_conversation(
return True
@app.get('/conversations/{conversation_id}/remember_prompt')
@app.get('/conversations/{conversation_id}/remember-prompt')
async def get_prompt(
conversation_id: str,
event_id: int,
user_settings: SettingsStore = Depends(get_user_settings_store),
conversation: ServerConversation | None = Depends(get_conversation_object),
metadata: ConversationMetadata = Depends(get_conversation_metadata),
):
if conversation is None:
return JSONResponse(
status_code=404,
content={'error': 'Conversation not found.'},
)
# get event stream for the conversation
event_stream = conversation.event_stream
# get event store for the conversation
event_store = EventStore(
sid=conversation_id, file_store=file_store, user_id=metadata.user_id
)
# retrieve the relevant events
stringified_events = _get_contextual_events(event_stream, event_id)
stringified_events = _get_contextual_events(event_store, event_id)
# generate a prompt
settings = await user_settings.load()
@@ -551,7 +551,7 @@ async def stop_conversation(
)
def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
def _get_contextual_events(event_store: EventStore, event_id: int) -> str:
# find the specified events to learn from
# Get X events around the target event
context_size = 4
@@ -567,7 +567,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
) # the types of events that can be in an agent's history
# from event_id - context_size to event_id..
context_before = event_stream.search_events(
context_before = event_store.search_events(
start_id=event_id,
filter=agent_event_filter,
reverse=True,
@@ -575,7 +575,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
)
# from event_id to event_id + context_size + 1
context_after = event_stream.search_events(
context_after = event_store.search_events(
start_id=event_id + 1,
filter=agent_event_filter,
limit=context_size + 1,

View File

@@ -2,6 +2,7 @@ import uuid
from types import MappingProxyType
from typing import Any
from openhands.core.config.mcp_config import MCPConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events.action.message import MessageAction
from openhands.experiments.experiment_manager import ExperimentManagerImpl
@@ -44,6 +45,7 @@ async def create_new_conversation(
attach_convo_id: bool = False,
git_provider: ProviderType | None = None,
conversation_id: str | None = None,
mcp_config: MCPConfig | None = None,
) -> AgentLoopInfo:
logger.info(
'Creating conversation',
@@ -82,6 +84,9 @@ async def create_new_conversation(
session_init_args['selected_branch'] = selected_branch
session_init_args['git_provider'] = git_provider
session_init_args['conversation_instructions'] = conversation_instructions
if mcp_config:
session_init_args['mcp_config'] = mcp_config
conversation_init_data = ConversationInitData(**session_init_args)
logger.info('Loading conversation store')

View File

@@ -124,10 +124,12 @@ class Session:
)
# Set Git user configuration if provided in settings
if hasattr(settings, 'git_user_name') and settings.git_user_name:
self.config.git_user_name = settings.git_user_name
if hasattr(settings, 'git_user_email') and settings.git_user_email:
self.config.git_user_email = settings.git_user_email
git_user_name = getattr(settings, 'git_user_name', None)
if git_user_name is not None:
self.config.git_user_name = git_user_name
git_user_email = getattr(settings, 'git_user_email', None)
if git_user_email is not None:
self.config.git_user_email = git_user_email
max_iterations = settings.max_iterations or self.config.max_iterations
# Prioritize settings over config for max_budget_per_task
@@ -152,6 +154,14 @@ class Session:
self.logger.debug(
f'MCP configuration before setup - self.config.mcp_config: {self.config.mcp}'
)
# Check if settings has custom mcp_config
mcp_config = getattr(settings, 'mcp_config', None)
if mcp_config is not None:
# Use the provided MCP SHTTP servers instead of default setup
self.config.mcp = self.config.mcp.merge(mcp_config)
self.logger.debug(f'Merged custom MCP Config: {mcp_config}')
# Add OpenHands' MCP server by default
openhands_mcp_server, openhands_mcp_stdio_servers = (
OpenHandsMCPConfigImpl.create_default_mcp_server_config(
@@ -163,7 +173,7 @@ class Session:
self.config.mcp.shttp_servers.append(openhands_mcp_server)
self.logger.debug('Added default MCP HTTP server to config')
self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)
self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)
self.logger.debug(
f'MCP configuration after setup - self.config.mcp: {self.config.mcp}'

View File

@@ -56,6 +56,7 @@ def get_supported_llm_models(config: OpenHandsConfig) -> list[str]:
# Add OpenHands provider models
openhands_models = [
'openhands/claude-sonnet-4-20250514',
'openhands/gpt-5-2025-08-07',
'openhands/claude-opus-4-20250514',
'openhands/gemini-2.5-pro',
'openhands/o3',

View File

@@ -4,6 +4,7 @@ from itertools import islice
from jinja2 import Template
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
from openhands.controller.state.state import State
from openhands.core.message import Message, TextContent
from openhands.events.observation.agent import MicroagentKnowledge
@@ -91,7 +92,8 @@ class PromptManager:
return Template(file.read())
def get_system_message(self) -> str:
return self.system_template.render().strip()
system_message = self.system_template.render().strip()
return refine_prompt(system_message)
def get_example_user_message(self) -> str:
"""This is an initial user message that can be provided to the agent

View File

@@ -10,6 +10,7 @@ class TermColor(Enum):
SUCCESS = 'green'
ERROR = 'red'
INFO = 'blue'
GREY = 'dark_grey'
def colorize(text: str, color: TermColor = TermColor.WARNING) -> str:

6
package-lock.json generated
View File

@@ -1,6 +0,0 @@
{
"name": "OpenHands",
"lockfileVersion": 3,
"requires": true,
"packages": {}
}

View File

@@ -42,6 +42,7 @@ numpy = "*"
json-repair = "*"
browsergym-core = "0.13.3" # integrate browsergym-core as the browsing interface
html2text = "*"
rich = "*" # For terminal formatting and markdown rendering
deprecated = "*"
pexpect = "*"
jinja2 = "^3.1.3"
@@ -166,7 +167,7 @@ joblib = "*"
swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }
[tool.poetry.scripts]
openhands = "openhands.cli.main:main"
openhands = "openhands.cli.entry:main"
[tool.poetry.group.testgeneval.dependencies]
fuzzywuzzy = "^0.18.0"

View File

@@ -1,17 +1,36 @@
import pytest
from openhands.core.config import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS, get_parser
from openhands.core.config import (
get_evaluation_parser,
get_headless_parser,
)
def test_parser_default_values():
parser = get_parser()
def test_headless_parser_default_values():
parser = get_headless_parser()
args = parser.parse_args([])
assert args.directory is None
assert args.task == ''
assert args.file is None
assert args.agent_cls == OH_DEFAULT_AGENT
assert args.max_iterations == OH_MAX_ITERATIONS
assert args.agent_cls is None
assert args.max_iterations is None
assert args.max_budget_per_task is None
assert args.llm_config is None
assert args.name == ''
assert not args.no_auto_continue
assert args.selected_repo is None
def test_evaluation_parser_default_values():
parser = get_evaluation_parser()
args = parser.parse_args([])
assert args.directory is None
assert args.task == ''
assert args.file is None
assert args.agent_cls is None
assert args.max_iterations is None
assert args.max_budget_per_task is None
assert args.eval_output_dir == 'evaluation/evaluation_outputs/outputs'
assert args.eval_n_limit is None
@@ -23,8 +42,8 @@ def test_parser_default_values():
assert args.selected_repo is None
def test_parser_custom_values():
parser = get_parser()
def test_evaluation_parser_custom_values():
parser = get_evaluation_parser()
args = parser.parse_args(
[
'-v',
@@ -76,7 +95,7 @@ def test_parser_custom_values():
def test_parser_file_overrides_task():
parser = get_parser()
parser = get_headless_parser()
args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt'])
assert args.task == 'task from command'
@@ -84,31 +103,31 @@ def test_parser_file_overrides_task():
def test_parser_invalid_max_iterations():
parser = get_parser()
parser = get_headless_parser()
with pytest.raises(SystemExit):
parser.parse_args(['-i', 'not_a_number'])
def test_parser_invalid_max_budget():
parser = get_parser()
parser = get_headless_parser()
with pytest.raises(SystemExit):
parser.parse_args(['-b', 'not_a_number'])
def test_parser_invalid_eval_n_limit():
parser = get_parser()
def test_evaluation_parser_invalid_eval_n_limit():
parser = get_evaluation_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--eval-n-limit', 'not_a_number'])
def test_parser_invalid_eval_num_workers():
parser = get_parser()
def test_evaluation_parser_invalid_eval_num_workers():
parser = get_evaluation_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--eval-num-workers', 'not_a_number'])
def test_help_message(capsys):
parser = get_parser()
def test_headless_parser_help_message(capsys):
parser = get_headless_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--help'])
captured = capsys.readouterr()
@@ -126,6 +145,41 @@ def test_help_message(capsys):
'-c AGENT_CLS, --agent-cls AGENT_CLS',
'-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
'-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
'-l LLM_CONFIG, --llm-config LLM_CONFIG',
'--agent-config AGENT_CONFIG',
'-n NAME, --name NAME',
'--config-file CONFIG_FILE',
'--no-auto-continue',
'--selected-repo SELECTED_REPO',
'--log-level LOG_LEVEL',
]
for element in expected_elements:
assert element in help_output, f"Expected '{element}' to be in the help message"
option_count = help_output.count(' -')
assert option_count == 15, f'Expected 15 options, found {option_count}'
def test_evaluation_parser_help_message(capsys):
parser = get_evaluation_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--help'])
captured = capsys.readouterr()
help_output = captured.out
print(help_output)
expected_elements = [
'usage:',
'Run OpenHands in evaluation mode',
'options:',
'-v, --version',
'-h, --help',
'-d DIRECTORY, --directory DIRECTORY',
'-t TASK, --task TASK',
'-f FILE, --file FILE',
'-c AGENT_CLS, --agent-cls AGENT_CLS',
'-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
'-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
'--eval-output-dir EVAL_OUTPUT_DIR',
'--eval-n-limit EVAL_N_LIMIT',
'--eval-num-workers EVAL_NUM_WORKERS',
@@ -137,7 +191,6 @@ def test_help_message(capsys):
'--config-file CONFIG_FILE',
'--no-auto-continue',
'--selected-repo SELECTED_REPO',
'--override-cli-mode OVERRIDE_CLI_MODE',
'--log-level LOG_LEVEL',
]
@@ -145,11 +198,11 @@ def test_help_message(capsys):
assert element in help_output, f"Expected '{element}' to be in the help message"
option_count = help_output.count(' -')
assert option_count == 21, f'Expected 21 options, found {option_count}'
assert option_count == 20, f'Expected 20 options, found {option_count}'
def test_selected_repo_format():
"""Test that the selected-repo argument accepts owner/repo format."""
parser = get_parser()
parser = get_headless_parser()
args = parser.parse_args(['--selected-repo', 'owner/repo'])
assert args.selected_repo == 'owner/repo'

View File

@@ -325,7 +325,6 @@ async def test_run_session_with_initial_action(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -345,7 +344,6 @@ async def test_main_without_task(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function without a task."""
loop = asyncio.get_running_loop()
@@ -359,7 +357,10 @@ async def test_main_without_task(
mock_args.llm_config = None
mock_args.name = None
mock_args.file = None
mock_parse_args.return_value = mock_args
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -393,10 +394,9 @@ async def test_main_without_task(
mock_run_session.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -412,11 +412,11 @@ async def test_main_without_task(
None,
session_name=None,
skip_banner=False,
conversation_id=None,
)
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -436,7 +436,6 @@ async def test_main_with_task(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function with a task."""
loop = asyncio.get_running_loop()
@@ -449,7 +448,11 @@ async def test_main_with_task(
mock_args.agent_cls = 'custom-agent'
mock_args.llm_config = 'custom-config'
mock_args.file = None
mock_parse_args.return_value = mock_args
mock_args.name = None
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -484,10 +487,9 @@ async def test_main_with_task(
mock_run_session.side_effect = [True, False]
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -518,7 +520,6 @@ async def test_main_with_task(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -538,7 +539,6 @@ async def test_main_with_session_name_passes_name_to_run_session(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function with a session name passes it to run_session."""
loop = asyncio.get_running_loop()
@@ -553,7 +553,10 @@ async def test_main_with_session_name_passes_name_to_run_session(
mock_args.llm_config = None
mock_args.name = test_session_name # Set the session name
mock_args.file = None
mock_parse_args.return_value = mock_args
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -587,10 +590,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
mock_run_session.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -606,6 +608,7 @@ async def test_main_with_session_name_passes_name_to_run_session(
None,
session_name=test_session_name,
skip_banner=False,
conversation_id=None,
)
@@ -709,7 +712,6 @@ async def test_run_session_with_name_attempts_state_restore(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -729,7 +731,6 @@ async def test_main_security_check_fails(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function when security check fails."""
loop = asyncio.get_running_loop()
@@ -739,7 +740,14 @@ async def test_main_security_check_fails(
# Mock arguments
mock_args = MagicMock()
mock_parse_args.return_value = mock_args
mock_args.agent_cls = None
mock_args.llm_config = None
mock_args.name = None
mock_args.file = None
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -761,10 +769,9 @@ async def test_main_security_check_fails(
mock_check_security.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -775,7 +782,6 @@ async def test_main_security_check_fails(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -795,7 +801,6 @@ async def test_config_loading_order(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test the order of configuration loading in the main function.
@@ -816,7 +821,10 @@ async def test_config_loading_order(
# Add a file property to avoid file I/O errors
mock_args.file = None
mock_args.log_level = 'INFO'
mock_parse_args.return_value = mock_args
mock_args.name = None
mock_args.conversation = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock read_task to return a dummy task
mock_read_task.return_value = 'Test task'
@@ -859,10 +867,9 @@ async def test_config_loading_order(
mock_run_session.return_value = False # No new session requested
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions for argument parsing and config setup
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -892,7 +899,6 @@ async def test_config_loading_order(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -914,7 +920,6 @@ async def test_main_with_file_option(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function with a file option."""
loop = asyncio.get_running_loop()
@@ -929,7 +934,10 @@ async def test_main_with_file_option(
mock_args.name = None
mock_args.file = '/path/to/test/file.txt'
mock_args.task = None
mock_parse_args.return_value = mock_args
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -965,10 +973,9 @@ async def test_main_with_file_option(
mock_run_session.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()

View File

@@ -145,8 +145,8 @@ class TestThoughtDisplayOrder:
# Verify that final thought is displayed
mock_display_message.assert_called_once_with('This is a final thought.')
@patch('openhands.cli.tui.display_message')
def test_message_action_from_agent(self, mock_display_message):
@patch('openhands.cli.tui.display_agent_message')
def test_message_action_from_agent(self, mock_display_agent_message):
"""Test that MessageAction from agent is displayed."""
config = MagicMock(spec=OpenHandsConfig)
@@ -156,8 +156,8 @@ class TestThoughtDisplayOrder:
display_event(message_action, config)
# Verify that message is displayed
mock_display_message.assert_called_once_with('Hello from agent')
# Verify that agent message is displayed
mock_display_agent_message.assert_called_once_with('Hello from agent')
@patch('openhands.cli.tui.display_message')
def test_message_action_from_user_not_displayed(self, mock_display_message):

View File

@@ -6,6 +6,7 @@ from openhands.cli.tui import (
CustomDiffLexer,
UsageMetrics,
UserCancelledError,
display_agent_message,
display_banner,
display_command,
display_event,
@@ -26,6 +27,7 @@ from openhands.events import EventSource
from openhands.events.action import (
Action,
ActionConfirmationStatus,
AgentFinishAction,
CmdRunAction,
MCPAction,
MessageAction,
@@ -107,15 +109,15 @@ class TestDisplayFunctions:
assert 'What do you want to build?' in message_text
assert 'Type /help for help' in message_text
@patch('openhands.cli.tui.display_message')
def test_display_event_message_action(self, mock_display_message):
@patch('openhands.cli.tui.display_agent_message')
def test_display_event_message_action(self, mock_display_agent_message):
config = MagicMock(spec=OpenHandsConfig)
message = MessageAction(content='Test message')
message._source = EventSource.AGENT
display_event(message, config)
mock_display_message.assert_called_once_with('Test message')
mock_display_agent_message.assert_called_once_with('Test message')
@patch('openhands.cli.tui.display_command')
def test_display_event_cmd_action(self, mock_display_command):
@@ -181,6 +183,15 @@ class TestDisplayFunctions:
display_event(action, config)
mock_display_message.assert_called_once_with('Thinking about this...')
@patch('openhands.cli.tui.display_agent_message')
def test_display_event_agent_finish(self, mock_display_agent_message):
config = MagicMock(spec=OpenHandsConfig)
finish_action = AgentFinishAction(final_thought='Task completed')
display_event(finish_action, config)
mock_display_agent_message.assert_called_once_with('Task completed', is_finish=True)
@patch('openhands.cli.tui.display_mcp_action')
def test_display_event_mcp_action(self, mock_display_mcp_action):
@@ -255,6 +266,37 @@ class TestDisplayFunctions:
mock_print.assert_called_once()
args, kwargs = mock_print.call_args
assert message in str(args[0])
@patch('openhands.cli.tui.shutil.get_terminal_size')
@patch('openhands.cli.tui.print_formatted_text')
def test_display_agent_message(self, mock_print_formatted, mock_terminal_size):
from collections import namedtuple
# Mock terminal size
Size = namedtuple('Size', ['columns', 'lines'])
mock_terminal_size.return_value = Size(columns=80, lines=24)
message = 'Agent message'
display_agent_message(message)
# Should be called multiple times now (header, separator, content)
assert mock_print_formatted.call_count >= 3
@patch('openhands.cli.tui.shutil.get_terminal_size')
@patch('openhands.cli.tui.print_formatted_text')
def test_display_agent_message_with_markdown(self, mock_print_formatted, mock_terminal_size):
from collections import namedtuple
# Mock terminal size
Size = namedtuple('Size', ['columns', 'lines'])
mock_terminal_size.return_value = Size(columns=80, lines=24)
# Test with markdown content
message = '# Heading\n\nThis is **bold** text.'
display_agent_message(message)
# Should be called multiple times now (header, separator, content)
assert mock_print_formatted.call_count >= 3
@patch('openhands.cli.tui.print_container')
def test_display_command_awaiting_confirmation(self, mock_print_container):

View File

@@ -3,6 +3,8 @@ from unittest.mock import MagicMock, patch
import pytest
from openhands.core.config import (
OH_DEFAULT_AGENT,
OH_MAX_ITERATIONS,
OpenHandsConfig,
get_llm_config_arg,
setup_config_from_args,
@@ -308,3 +310,74 @@ def test_cli_settings_json_not_override_config_toml(
# Verify that settings.json did not override config.toml
assert test_llm_config.model == 'config-toml-model'
assert test_llm_config.api_key == 'config-toml-api-key'
def test_default_values_applied_when_none():
"""Test that default values are applied when config values are None."""
# Create mock args with None values for agent_cls and max_iterations
mock_args = MagicMock()
mock_args.config_file = None
mock_args.llm_config = None
mock_args.agent_cls = None
mock_args.max_iterations = None
# Load config
with patch(
'openhands.core.config.utils.load_openhands_config',
return_value=OpenHandsConfig(),
):
config = setup_config_from_args(mock_args)
# Verify they match the expected defaults
assert config.default_agent == OH_DEFAULT_AGENT
assert config.max_iterations == OH_MAX_ITERATIONS
def test_cli_args_override_defaults():
"""Test that CLI arguments override default values."""
# Create mock args with custom values
mock_args = MagicMock()
mock_args.config_file = None
mock_args.llm_config = None
mock_args.agent_cls = 'CustomAgent'
mock_args.max_iterations = 50
# Load config
with patch(
'openhands.core.config.utils.load_openhands_config',
return_value=OpenHandsConfig(),
):
config = setup_config_from_args(mock_args)
# Verify custom values are used instead of defaults
assert config.default_agent == 'CustomAgent'
assert config.max_iterations == 50
def test_cli_args_none_uses_config_toml_values():
"""Test that when CLI args agent_cls and max_iterations are None, config.toml values are used."""
# Create mock args with None values for agent_cls and max_iterations
mock_args = MagicMock()
mock_args.config_file = None
mock_args.llm_config = None
mock_args.agent_cls = None
mock_args.max_iterations = None
# Create a config with specific values from config.toml
config_from_toml = OpenHandsConfig()
config_from_toml.default_agent = 'ConfigTomlAgent'
config_from_toml.max_iterations = 100
# Load config
with patch(
'openhands.core.config.utils.load_openhands_config',
return_value=config_from_toml,
):
config = setup_config_from_args(mock_args)
# Verify config.toml values are preserved when CLI args are None
assert config.default_agent == 'ConfigTomlAgent'
assert config.max_iterations == 100

View File

@@ -13,6 +13,7 @@ from openhands.integrations.service_types import (
Repository,
)
from openhands.microagent.types import MicroagentContentResponse
from openhands.server.dependencies import check_session_api_key
from openhands.server.routes.git import app as git_app
from openhands.server.user_auth import (
get_access_token,
@@ -49,10 +50,15 @@ def test_client():
def mock_get_user_id():
return 'test_user'
def mock_check_session_api_key():
# Mock session API key check to always pass for tests
return None
# Override the dependencies in the app
app.dependency_overrides[get_provider_tokens] = mock_get_provider_tokens
app.dependency_overrides[get_access_token] = mock_get_access_token
app.dependency_overrides[get_user_id] = mock_get_user_id
app.dependency_overrides[check_session_api_key] = mock_check_session_api_key
yield TestClient(app)

View File

@@ -46,24 +46,32 @@ def test_localhost_cors_middleware_init_without_env_var():
def test_localhost_cors_middleware_is_allowed_origin_localhost(app):
"""Test that localhost origins are allowed regardless of port."""
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
"""Test that localhost origins are allowed regardless of port when no specific origins are configured."""
# Test without setting PERMITTED_CORS_ORIGINS to trigger localhost behavior
with patch.dict(os.environ, {}, clear=True):
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
# Test with localhost
response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
assert response.status_code == 200
assert response.headers['access-control-allow-origin'] == 'http://localhost:8000'
# Test with localhost
response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
assert response.status_code == 200
assert (
response.headers['access-control-allow-origin'] == 'http://localhost:8000'
)
# Test with different port
response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
assert response.status_code == 200
assert response.headers['access-control-allow-origin'] == 'http://localhost:3000'
# Test with different port
response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
assert response.status_code == 200
assert (
response.headers['access-control-allow-origin'] == 'http://localhost:3000'
)
# Test with 127.0.0.1
response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
assert response.status_code == 200
assert response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
# Test with 127.0.0.1
response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
assert response.status_code == 200
assert (
response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
)
def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
@@ -87,14 +95,15 @@ def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
def test_localhost_cors_middleware_missing_origin(app):
"""Test behavior when Origin header is missing."""
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
with patch.dict(os.environ, {}, clear=True):
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
# Test without Origin header
response = client.get('/test')
assert response.status_code == 200
# There should be no access-control-allow-origin header
assert 'access-control-allow-origin' not in response.headers
# Test without Origin header
response = client.get('/test')
assert response.status_code == 200
# There should be no access-control-allow-origin header
assert 'access-control-allow-origin' not in response.headers
def test_localhost_cors_middleware_inheritance():

View File

@@ -0,0 +1,179 @@
import sys
from unittest.mock import patch
import pytest
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.core.config import AgentConfig
from openhands.llm.llm import LLM
# Skip all tests in this module if not running on Windows
pytestmark = pytest.mark.skipif(
sys.platform != 'win32', reason='Windows prompt refinement tests require Windows'
)
@pytest.fixture
def mock_llm():
"""Create a mock LLM for testing."""
llm = LLM(config={'model': 'gpt-4', 'api_key': 'test'})
return llm
@pytest.fixture
def agent_config():
"""Create a basic agent config for testing."""
return AgentConfig()
def test_codeact_agent_system_prompt_no_bash_on_windows(mock_llm, agent_config):
"""Test that CodeActAgent's system prompt doesn't contain 'bash' on Windows."""
# Create a CodeActAgent instance
agent = CodeActAgent(llm=mock_llm, config=agent_config)
# Get the system prompt
system_prompt = agent.prompt_manager.get_system_message()
# Assert that 'bash' doesn't exist in the system prompt (case-insensitive)
assert 'bash' not in system_prompt.lower(), (
f"System prompt contains 'bash' on Windows platform. "
f"It should be replaced with 'powershell'. "
f'System prompt: {system_prompt}'
)
# Verify that 'powershell' exists instead (case-insensitive)
assert 'powershell' in system_prompt.lower(), (
f"System prompt should contain 'powershell' on Windows platform. "
f'System prompt: {system_prompt}'
)
def test_codeact_agent_tool_descriptions_no_bash_on_windows(mock_llm, agent_config):
"""Test that CodeActAgent's tool descriptions don't contain 'bash' on Windows."""
# Create a CodeActAgent instance
agent = CodeActAgent(llm=mock_llm, config=agent_config)
# Get the tools
tools = agent.tools
# Check each tool's description and parameters
for tool in tools:
if tool['type'] == 'function':
function_info = tool['function']
# Check function description
description = function_info.get('description', '')
assert 'bash' not in description.lower(), (
f"Tool '{function_info['name']}' description contains 'bash' on Windows. "
f'Description: {description}'
)
# Check parameter descriptions
parameters = function_info.get('parameters', {})
properties = parameters.get('properties', {})
for param_name, param_info in properties.items():
param_description = param_info.get('description', '')
assert 'bash' not in param_description.lower(), (
f"Tool '{function_info['name']}' parameter '{param_name}' "
f"description contains 'bash' on Windows. "
f'Parameter description: {param_description}'
)
def test_in_context_learning_example_no_bash_on_windows():
"""Test that in-context learning examples don't contain 'bash' on Windows."""
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
from openhands.agenthub.codeact_agent.tools.finish import FinishTool
from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
create_str_replace_editor_tool,
)
from openhands.llm.fn_call_converter import get_example_for_tools
# Create a sample set of tools
tools = [
create_cmd_run_tool(),
create_str_replace_editor_tool(),
FinishTool,
]
# Get the in-context learning example
example = get_example_for_tools(tools)
# Assert that 'bash' doesn't exist in the example (case-insensitive)
assert 'bash' not in example.lower(), (
f"In-context learning example contains 'bash' on Windows platform. "
f"It should be replaced with 'powershell'. "
f'Example: {example}'
)
# Verify that 'powershell' exists instead (case-insensitive)
if example: # Only check if example is not empty
assert 'powershell' in example.lower(), (
f"In-context learning example should contain 'powershell' on Windows platform. "
f'Example: {example}'
)
def test_refine_prompt_function_works():
"""Test that the refine_prompt function correctly replaces 'bash' with 'powershell'."""
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
# Test basic replacement
test_prompt = 'Execute a bash command to list files'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert 'powershell' in refined_prompt.lower()
assert refined_prompt == 'Execute a powershell command to list files'
# Test multiple occurrences
test_prompt = 'Use bash to run bash commands in the bash shell'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert (
refined_prompt
== 'Use powershell to run powershell commands in the powershell shell'
)
# Test case sensitivity
test_prompt = 'BASH and Bash and bash should all be replaced'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert (
refined_prompt
== 'powershell and powershell and powershell should all be replaced'
)
# Test execute_bash tool name replacement
test_prompt = 'Use the execute_bash tool to run commands'
refined_prompt = refine_prompt(test_prompt)
assert 'execute_bash' not in refined_prompt.lower()
assert 'execute_powershell' in refined_prompt.lower()
assert refined_prompt == 'Use the execute_powershell tool to run commands'
# Test that words containing 'bash' but not equal to 'bash' are preserved
test_prompt = 'The bashful person likes bash-like syntax'
refined_prompt = refine_prompt(test_prompt)
# 'bashful' should be preserved, 'bash-like' should become 'powershell-like'
assert 'bashful' in refined_prompt
assert 'powershell-like' in refined_prompt
assert refined_prompt == 'The bashful person likes powershell-like syntax'
def test_refine_prompt_function_on_non_windows():
"""Test that the refine_prompt function doesn't change anything on non-Windows platforms."""
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
# Mock sys.platform to simulate non-Windows
with patch('openhands.agenthub.codeact_agent.tools.bash.sys.platform', 'linux'):
test_prompt = 'Execute a bash command to list files'
refined_prompt = refine_prompt(test_prompt)
# On non-Windows, the prompt should remain unchanged
assert refined_prompt == test_prompt
assert 'bash' in refined_prompt.lower()