Compare commits

...

19 Commits

Author SHA1 Message Date
openhands
f10360f416 test: fix unit tests
- Add missing dependency 'markdown' for CLI TUI rendering
- Prevent env var WORKSPACE_MOUNT_PATH_IN_SANDBOX from overriding default when SANDBOX_VOLUMES lacks /workspace

Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-28 18:41:13 +00:00
openhands
bf13354bbd Make setup process more friendly and welcoming
- Add emojis and encouraging language to setup messages
- Replace technical jargon with conversational tone
- Add visual indicators for different setup steps
- Include completion messages that celebrate user progress
- Update setup script, Makefile, and VS Code build script
2025-08-11 18:15:52 +00:00
Robert Brennan
385acded2c Update SECURITY.md 2025-08-11 09:01:17 -04:00
Robert Brennan
ab079488c6 Create SECURITY.md 2025-08-09 14:37:18 -04:00
Boxuan Li
803bdced9c Fix Windows prompt refinement: ensure 'bash' is replaced with 'powershell' in all prompts (#10179)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 20:28:36 -07:00
Xingyao Wang
3eecac2003 docs: Add GPT-5 model recommendation and fix pricing display issue (#10177)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 19:19:59 +00:00
mamoodi
c02e09fc2d Hide Git Settings section from Application settings (#10176)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 19:06:40 +00:00
Tim O'Farrell
18f8661770 feat: add mcp_shttp_servers override to conversation initialization (#10171)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 18:05:44 +00:00
Xingyao Wang
04ff4a025b feat(cli): Use CLI to launch OpenHands UI server via Docker (#9783)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-09 02:04:07 +08:00
mamoodi
81ef363658 Increase stale bot inactivity time and better messaging (#10167)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-08-08 16:41:15 +00:00
Xingyao Wang
1474c5bc1c Support gpt-5-2025-08-07 and add it to OpenHands provider (#10172)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 16:05:51 +00:00
sp.wack
9b0a5da839 Use EventStore directly in remember prompt; merge client services (#10143)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-08 18:03:03 +04:00
Graham Neubig
7ab2ad2c1b Fix authentication setup issues in unit tests (#10118)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-07 22:12:21 -04:00
Graham Neubig
8416a019cb Fix unit test failures by prioritizing current directory in PYTHONPATH (#10105)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-07 22:12:02 -04:00
Engel Nyst
73a7c7786d Load previous conversation by id (CLI) (#10156) 2025-08-07 23:09:20 +02:00
aeft
11d12c5a01 fix: prevent CLI argument parser defaults from overriding config file values (#10140) 2025-08-08 04:48:04 +08:00
Xingyao Wang
c4f303a07b chore(eval): Remove eval_infer_remote.sh script and related references (#10157)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-07 20:46:59 +00:00
Kenny Dizi
3a629cdf08 Add support model claude-opus-4-1-20250805 (#10120) 2025-08-07 18:48:34 +00:00
sp.wack
6ea33b657d chore(frontend): Remove some dead code (#10121) 2025-08-08 02:40:35 +08:00
79 changed files with 1375 additions and 604 deletions

View File

@@ -1,33 +1,53 @@
#!/bin/bash
set -euxo pipefail
# This script updates the PR description with commands to run the PR locally
# It adds both Docker and uvx commands
# Get the branch name for the PR
BRANCH_NAME=$(gh pr view $PR_NUMBER --json headRefName --jq .headRefName)
BRANCH_NAME=$(gh pr view "$PR_NUMBER" --json headRefName --jq .headRefName)
# Define the Docker command
DOCKER_RUN_COMMAND="docker run -it --rm \
-p 3000:3000 \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:$SHORT_SHA-nikolaik \
--name openhands-app-$SHORT_SHA \
docker.all-hands.dev/all-hands-ai/openhands:$SHORT_SHA"
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${SHORT_SHA}-nikolaik \
--name openhands-app-${SHORT_SHA} \
docker.all-hands.dev/all-hands-ai/openhands:${SHORT_SHA}"
# Define the uvx command
UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@$BRANCH_NAME openhands"
UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@${BRANCH_NAME} openhands"
# Get the current PR body
PR_BODY=$(gh pr view $PR_NUMBER --json body --jq .body)
PR_BODY=$(gh pr view "$PR_NUMBER" --json body --jq .body)
# Prepare the new PR body with both commands
if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
# For existing PR descriptions, replace the command section
NEW_PR_BODY=$(echo "$PR_BODY" | sed "s|To run this PR locally, use the following command:.*\`\`\`|To run this PR locally, use the following command:\n\nGUI with Docker:\n\`\`\`\n$DOCKER_RUN_COMMAND\n\`\`\`\n\nCLI with uvx:\n\`\`\`\n$UVX_RUN_COMMAND\n\`\`\`|s")
# For existing PR descriptions, use a more robust approach
# Split the PR body at the "To run this PR locally" section and replace everything after it
BEFORE_SECTION=$(echo "$PR_BODY" | sed '/To run this PR locally, use the following command:/,$d')
NEW_PR_BODY=$(cat <<EOF
${BEFORE_SECTION}
To run this PR locally, use the following command:
GUI with Docker:
\`\`\`
${DOCKER_RUN_COMMAND}
\`\`\`
CLI with uvx:
\`\`\`
${UVX_RUN_COMMAND}
\`\`\`
EOF
)
else
# For new PR descriptions
NEW_PR_BODY="${PR_BODY}
# For new PR descriptions: use heredoc safely without indentation
NEW_PR_BODY=$(cat <<EOF
$PR_BODY
---
@@ -35,15 +55,17 @@ To run this PR locally, use the following command:
GUI with Docker:
\`\`\`
$DOCKER_RUN_COMMAND
${DOCKER_RUN_COMMAND}
\`\`\`
CLI with uvx:
\`\`\`
$UVX_RUN_COMMAND
\`\`\`"
${UVX_RUN_COMMAND}
\`\`\`
EOF
)
fi
# Update the PR description
echo "Updating PR description with Docker and uvx commands"
gh pr edit $PR_NUMBER --body "$NEW_PR_BODY"
gh pr edit "$PR_NUMBER" --body "$NEW_PR_BODY"

View File

@@ -48,11 +48,11 @@ jobs:
- name: Build Environment
run: make build
- name: Run Unit Tests
run: poetry run pytest --forked -n auto -svv ./tests/unit
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
- name: Run Runtime Tests with CLIRuntime
run: TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
- name: Run E2E Tests
run: poetry run pytest -svv tests/e2e
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest -svv tests/e2e
# Run specific Windows python tests
test-on-windows:
@@ -77,9 +77,11 @@ jobs:
- name: Run Windows unit tests
run: poetry run pytest -svv tests/unit/test_windows_bash.py
env:
PYTHONPATH: ".;$env:PYTHONPATH"
DEBUG: "1"
- name: Run Windows runtime tests with LocalRuntime
run: $env:TEST_RUNTIME="local"; poetry run pytest -svv tests/runtime/test_bash.py
env:
PYTHONPATH: ".;$env:PYTHONPATH"
TEST_RUNTIME: local
DEBUG: "1"

View File

@@ -12,11 +12,11 @@ jobs:
steps:
- uses: actions/stale@v9
with:
stale-issue-message: 'This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
days-before-stale: 30
stale-issue-message: 'This issue is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
stale-pr-message: 'This PR is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
days-before-stale: 40
exempt-issue-labels: 'roadmap'
close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
days-before-close: 7
close-issue-message: 'This issue was automatically closed due to 50 days of inactivity. We do this to help keep the issues somewhat manageable and focus on active issues.'
close-pr-message: 'This PR was closed because it had no activity for 50 days. If you feel this was closed in error, and you would like to continue the PR, please resubmit or let us know.'
days-before-close: 10
operations-per-run: 150

View File

@@ -1,13 +1,17 @@
#! /bin/bash
echo "Setting up the environment..."
echo "🚀 Welcome to OpenHands! Let's get your development environment ready..."
# Install pre-commit package
echo "📦 Installing pre-commit to help maintain code quality..."
python -m pip install pre-commit
# Install pre-commit hooks if .git directory exists
if [ -d ".git" ]; then
echo "Installing pre-commit hooks..."
echo "🔧 Setting up pre-commit hooks to keep your code clean..."
pre-commit install
make install-pre-commit-hooks
echo ""
echo "🎉 Setup complete! Your OpenHands development environment is ready!"
echo "💡 You can now start contributing to OpenHands. Happy coding! 🚀"
fi

View File

@@ -23,16 +23,16 @@ RESET=$(shell tput -Txterm sgr0)
# Build
build:
@echo "$(GREEN)Building project...$(RESET)"
@echo "$(GREEN)🚀 Building OpenHands project...$(RESET)"
@$(MAKE) -s check-dependencies
@$(MAKE) -s install-python-dependencies
@$(MAKE) -s install-frontend-dependencies
@$(MAKE) -s install-pre-commit-hooks
@$(MAKE) -s build-frontend
@echo "$(GREEN)Build completed successfully.$(RESET)"
@echo "$(GREEN)🎉 Build completed successfully! You're ready to go!$(RESET)"
check-dependencies:
@echo "$(YELLOW)Checking dependencies...$(RESET)"
@echo "$(YELLOW)🔍 Checking your development environment...$(RESET)"
@$(MAKE) -s check-system
@$(MAKE) -s check-python
@$(MAKE) -s check-npm
@@ -42,7 +42,7 @@ ifeq ($(INSTALL_DOCKER),)
endif
@$(MAKE) -s check-poetry
@$(MAKE) -s check-tmux
@echo "$(GREEN)Dependencies checked successfully.$(RESET)"
@echo "$(GREEN)✅ All dependencies look great!$(RESET)"
check-system:
@echo "$(YELLOW)Checking system...$(RESET)"
@@ -62,11 +62,11 @@ check-system:
fi
check-python:
@echo "$(YELLOW)Checking Python installation...$(RESET)"
@echo "$(YELLOW)🐍 Checking Python installation...$(RESET)"
@if command -v python$(PYTHON_VERSION) > /dev/null; then \
echo "$(BLUE)$(shell python$(PYTHON_VERSION) --version) is already installed.$(RESET)"; \
echo "$(BLUE)✅ Great! $(shell python$(PYTHON_VERSION) --version) is ready to go.$(RESET)"; \
else \
echo "$(RED)Python $(PYTHON_VERSION) is not installed. Please install Python $(PYTHON_VERSION) to continue.$(RESET)"; \
echo "$(RED)❌ Oops! Python $(PYTHON_VERSION) is not installed. Please install Python $(PYTHON_VERSION) to continue.$(RESET)"; \
exit 1; \
fi
@@ -117,76 +117,76 @@ check-tmux:
fi
check-poetry:
@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
@echo "$(YELLOW)📝 Checking Poetry installation...$(RESET)"
@if command -v poetry > /dev/null; then \
POETRY_VERSION=$(shell poetry --version 2>&1 | sed -E 's/Poetry \(version ([0-9]+\.[0-9]+\.[0-9]+)\)/\1/'); \
IFS='.' read -r -a POETRY_VERSION_ARRAY <<< "$$POETRY_VERSION"; \
if [ $${POETRY_VERSION_ARRAY[0]} -gt 1 ] || ([ $${POETRY_VERSION_ARRAY[0]} -eq 1 ] && [ $${POETRY_VERSION_ARRAY[1]} -ge 8 ]); then \
echo "$(BLUE)$(shell poetry --version) is already installed.$(RESET)"; \
echo "$(BLUE)✅ Perfect! $(shell poetry --version) is ready to manage your dependencies.$(RESET)"; \
else \
echo "$(RED)Poetry 1.8 or later is required. You can install poetry by running the following command, then adding Poetry to your PATH:"; \
echo "$(RED)❌ We need Poetry 1.8 or later. You can install it by running:"; \
echo "$(RED) curl -sSL https://install.python-poetry.org | python$(PYTHON_VERSION) -$(RESET)"; \
echo "$(RED)More detail here: https://python-poetry.org/docs/#installing-with-the-official-installer$(RESET)"; \
echo "$(RED)📖 More details: https://python-poetry.org/docs/#installing-with-the-official-installer$(RESET)"; \
exit 1; \
fi; \
else \
echo "$(RED)Poetry is not installed. You can install poetry by running the following command, then adding Poetry to your PATH:"; \
echo "$(RED)Poetry is not installed. You can install it by running:"; \
echo "$(RED) curl -sSL https://install.python-poetry.org | python$(PYTHON_VERSION) -$(RESET)"; \
echo "$(RED)More detail here: https://python-poetry.org/docs/#installing-with-the-official-installer$(RESET)"; \
echo "$(RED)📖 More details: https://python-poetry.org/docs/#installing-with-the-official-installer$(RESET)"; \
exit 1; \
fi
install-python-dependencies:
@echo "$(GREEN)Installing Python dependencies...$(RESET)"
@echo "$(GREEN)📦 Installing Python dependencies...$(RESET)"
@if [ -z "${TZ}" ]; then \
echo "Defaulting TZ (timezone) to UTC"; \
echo "🌍 Defaulting timezone to UTC"; \
export TZ="UTC"; \
fi
poetry env use python$(PYTHON_VERSION)
@if [ "$(shell uname)" = "Darwin" ]; then \
echo "$(BLUE)Installing chroma-hnswlib...$(RESET)"; \
echo "$(BLUE)🍎 Installing macOS-specific dependencies...$(RESET)"; \
export HNSWLIB_NO_NATIVE=1; \
poetry run pip install chroma-hnswlib; \
fi
@if [ -n "${POETRY_GROUP}" ]; then \
echo "Installing only POETRY_GROUP=${POETRY_GROUP}"; \
echo "📋 Installing specific dependency group: ${POETRY_GROUP}"; \
poetry install --only $${POETRY_GROUP}; \
else \
poetry install --with dev,test,runtime; \
fi
@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
if [ -f "/etc/manjaro-release" ]; then \
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
echo "$(BLUE)🐧 Detected Manjaro Linux. Installing browser automation tools...$(RESET)"; \
poetry run pip install playwright; \
poetry run playwright install chromium; \
else \
if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
echo "Running playwright install --with-deps chromium..."; \
echo "🌐 Installing browser automation tools..."; \
poetry run playwright install --with-deps chromium; \
mkdir -p cache; \
touch cache/playwright_chromium_is_installed.txt; \
else \
echo "Setup already done. Skipping playwright installation."; \
echo "✅ Browser tools already set up. Skipping installation."; \
fi \
fi \
else \
echo "Skipping Playwright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
echo "⏭️ Skipping browser automation setup (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
fi
@echo "$(GREEN)Python dependencies installed successfully.$(RESET)"
@echo "$(GREEN)🎉 Python dependencies installed successfully!$(RESET)"
install-frontend-dependencies: check-npm check-nodejs
@echo "$(YELLOW)Setting up frontend environment...$(RESET)"
@echo "$(YELLOW)Detect Node.js version...$(RESET)"
@echo "$(YELLOW)🎨 Setting up frontend environment...$(RESET)"
@echo "$(YELLOW)🔍 Detecting Node.js version...$(RESET)"
@cd frontend && node ./scripts/detect-node-version.js
echo "$(BLUE)Installing frontend dependencies with npm...$(RESET)"
echo "$(BLUE)📦 Installing frontend dependencies with npm...$(RESET)"
@cd frontend && npm install
@echo "$(GREEN)Frontend dependencies installed successfully.$(RESET)"
@echo "$(GREEN)Frontend dependencies installed successfully!$(RESET)"
install-pre-commit-hooks: check-python check-poetry install-python-dependencies
@echo "$(YELLOW)Installing pre-commit hooks...$(RESET)"
@echo "$(YELLOW)🔧 Installing pre-commit hooks...$(RESET)"
@git config --unset-all core.hooksPath || true
@poetry run pre-commit install --config $(PRE_COMMIT_CONFIG_PATH)
@echo "$(GREEN)Pre-commit hooks installed successfully.$(RESET)"
@echo "$(GREEN)Pre-commit hooks installed successfully!$(RESET)"
lint-backend: install-pre-commit-hooks
@echo "$(YELLOW)Running linters...$(RESET)"

15
SECURITY.md Normal file
View File

@@ -0,0 +1,15 @@
# Security Policy
**Please send all vulnerability reports to contact@all-hands.dev in addition to opening a security advisory on GitHub.**
## Security/Bugfix Versions
Security and bug fixes are generally provided only for the most recent version of OpenHands. Fixes are released either as part of the next minor version or as an on-demand patch version.
Security fixes are given priority and might be enough to cause a new version to be released.
## Reporting a Vulnerability
We encourage responsible disclosure of security vulnerabilities. If you find something suspicious, we encourage and appreciate your report!
### Ways to report
In order for the vulnerability reports to reach maintainers as soon as possible, the preferred way is to use the "Report a vulnerability" button under the "Security" tab of the associated GitHub project. This creates a private communication channel between the reporter and the maintainers.
In addition, please also reach out to the All Hands AI security team at contact@all-hands.dev.

View File

@@ -55,11 +55,11 @@ def build_vscode_extension():
print(f'--- Using pre-built VS Code extension: {vsix_path} ---')
return
print(f'--- Building VS Code extension in {VSCODE_EXTENSION_DIR} ---')
print(f'🔨 Building VS Code extension in {VSCODE_EXTENSION_DIR}')
try:
# Ensure npm dependencies are installed
print('--- Running npm install for VS Code extension ---')
print('📦 Installing dependencies for VS Code extension...')
subprocess.run(
['npm', 'install'],
cwd=VSCODE_EXTENSION_DIR,
@@ -68,7 +68,7 @@ def build_vscode_extension():
)
# Package the extension
print(f'--- Packaging VS Code extension ({VSIX_FILENAME}) ---')
print(f'📦 Packaging VS Code extension ({VSIX_FILENAME})...')
subprocess.run(
['npm', 'run', 'package-vsix'],
cwd=VSCODE_EXTENSION_DIR,
@@ -82,14 +82,14 @@ def build_vscode_extension():
f'VS Code extension package not found after build: {vsix_path}'
)
print(f'--- VS Code extension built successfully: {vsix_path} ---')
print(f'🎉 VS Code extension built successfully: {vsix_path}')
except subprocess.CalledProcessError as e:
print(f'--- Warning: Failed to build VS Code extension: {e} ---')
print('--- Continuing without building extension ---')
print(f'⚠️ Warning: Failed to build VS Code extension: {e}')
print('⏭️ Continuing without building extension...')
if not vsix_path.exists():
print('--- Warning: No pre-built VS Code extension found ---')
print('--- VS Code extension will not be available ---')
print('⚠️ Warning: No pre-built VS Code extension found')
print(' VS Code extension will not be available')
def build(setup_kwargs):
@@ -97,7 +97,7 @@ def build(setup_kwargs):
This function is called by Poetry during the build process.
`setup_kwargs` is a dictionary that will be passed to `setuptools.setup()`.
"""
print('--- Running custom Poetry build script (build_vscode.py) ---')
print('🔧 Running custom Poetry build script for VS Code extension...')
# Build the VS Code extension and place the .vsix file
build_vscode_extension()
@@ -105,10 +105,10 @@ def build(setup_kwargs):
# Poetry will handle including files based on pyproject.toml `include` patterns.
# Ensure openhands/integrations/vscode/*.vsix is included there.
print('--- Custom Poetry build script (build_vscode.py) finished ---')
print(' Custom Poetry build script completed!')
if __name__ == '__main__':
print('Running build_vscode.py directly for testing VS Code extension packaging...')
print('🧪 Testing VS Code extension packaging...')
build_vscode_extension()
print('Direct execution of build_vscode.py finished.')
print('✅ VS Code extension packaging test completed!')

View File

@@ -7,6 +7,67 @@ description: High level overview of the Graphical User Interface (GUI) in OpenHa
- [OpenHands is running](/usage/local-setup)
## Launching the GUI Server
### Using the CLI Command
You can launch the OpenHands GUI server directly from the command line using the `serve` command:
<Callout type="info">
**Prerequisites**: You need to have the [OpenHands CLI installed](/usage/how-to/cli-mode) first, OR have `uv` installed and run `uvx --python 3.12 --from openhands-ai openhands serve`. Otherwise, you'll need to use Docker directly (see the [Docker section](#using-docker-directly) below).
</Callout>
```bash
openhands serve
```
This command will:
- Check that Docker is installed and running
- Pull the required Docker images
- Launch the OpenHands GUI server at http://localhost:3000
- Use the same configuration directory (`~/.openhands`) as the CLI mode
#### Mounting Your Current Directory
To mount your current working directory into the GUI server container, use the `--mount-cwd` flag:
```bash
openhands serve --mount-cwd
```
This is useful when you want to work on files in your current directory through the GUI. The directory will be mounted at `/workspace` inside the container.
#### Using GPU Support
If you have NVIDIA GPUs and want to make them available to the OpenHands container, use the `--gpu` flag:
```bash
openhands serve --gpu
```
This will enable GPU support via nvidia-docker, mounting all available GPUs into the container. You can combine this with other flags:
```bash
openhands serve --gpu --mount-cwd
```
**Prerequisites for GPU support:**
- NVIDIA GPU drivers must be installed on your host system
- [NVIDIA Container Toolkit (nvidia-docker2)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) must be installed and configured
#### Requirements
Before using the `openhands serve` command, ensure that:
- Docker is installed and running on your system
- You have internet access to pull the required Docker images
- Port 3000 is available on your system
The CLI will automatically check these requirements and provide helpful error messages if anything is missing.
### Using Docker Directly
Alternatively, you can run the GUI server using Docker directly. See the [local setup guide](/usage/local-setup) for detailed Docker instructions.
## Overview
### Initial Setup

View File

@@ -18,7 +18,7 @@ Based on these findings and community feedback, these are the latest models that
### Cloud / API-Based Models
- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommended)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
- [openai/gpt-5-2025-08-07](https://openai.com/api/) (recommended)
- [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
- [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
- [moonshot/kimi-k2-0711-preview](https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2)

View File

@@ -32,4 +32,4 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
Pricing follows official API provider rates. [You can view model prices here.](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: $0.4 per million input tokens and $1.6 per million output tokens.
For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: \$0.4 per million input tokens and \$1.6 per million output tokens.

View File

@@ -66,6 +66,30 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
### Start the App
#### Option 1: Using the CLI Launcher (Recommended)
If you have Python 3.12+ installed, you can use the CLI launcher for a simpler experience:
```bash
# Install OpenHands
pip install openhands-ai
# Launch the GUI server
openhands serve
# Or with GPU support (requires nvidia-docker)
openhands serve --gpu
# Or with current directory mounted
openhands serve --mount-cwd
```
Or using `uvx --python 3.12 --from openhands-ai openhands serve` if you have [uv](https://docs.astral.sh/uv/) installed.
This will automatically handle Docker requirements checking, image pulling, and launching the GUI server. The `--gpu` flag enables GPU support via nvidia-docker, and `--mount-cwd` mounts your current directory into the container.
#### Option 2: Using Docker Directly
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.51-nikolaik

View File

@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -172,7 +172,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--answerer_model', '-a', default='gpt-3.5-turbo', help='answerer model'
)

View File

@@ -26,8 +26,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -525,7 +525,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -31,8 +31,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
load_from_toml,
)
from openhands.core.config.utils import get_agent_config_arg
@@ -294,7 +294,7 @@ Here is the task:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--level',
type=str,

View File

@@ -20,8 +20,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -134,7 +134,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--hubs',
type=str,

View File

@@ -38,8 +38,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -312,7 +312,7 @@ Ok now its time to start solving the question. Good luck!
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
# data split must be one of 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended'
parser.add_argument(
'--data-split',

View File

@@ -21,7 +21,7 @@ from evaluation.utils.shared import (
from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_parser,
get_evaluation_parser,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
@@ -167,7 +167,7 @@ def process_predictions(predictions_path: str):
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'-s',
'--eval-split',

View File

@@ -30,8 +30,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
@@ -358,7 +358,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'-s',
'--eval-split',

View File

@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -267,7 +267,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -23,8 +23,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -229,7 +229,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
SUBSETS = [
# Eurus subset: https://arxiv.org/abs/2404.02078

View File

@@ -4,7 +4,11 @@ import pprint
import tqdm
from openhands.core.config import get_llm_config_arg, get_parser, load_openhands_config
from openhands.core.config import (
get_evaluation_parser,
get_llm_config_arg,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
from openhands.llm.llm import LLM
@@ -111,7 +115,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--json_file_path',
type=str,

View File

@@ -34,8 +34,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
load_openhands_config,
)
from openhands.core.logger import openhands_logger as logger
@@ -273,7 +273,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'-s',
'--eval-split',

View File

@@ -30,7 +30,7 @@ from evaluation.utils.shared import (
from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_parser,
get_evaluation_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
@@ -323,7 +323,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--input-file',
type=str,

View File

@@ -32,8 +32,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -772,7 +772,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
if __name__ == '__main__':
# pdb.set_trace()
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -21,8 +21,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -239,7 +239,7 @@ If the program uses some packages that are incompatible, please figure out alter
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--use-knowledge',
type=str,

View File

@@ -183,24 +183,7 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
- `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
- `logs/`: a directory of test logs
### Run evaluation with `RemoteRuntime`
OpenHands Remote Runtime is currently in beta (read [here](https://runtime.all-hands.dev/) for more details), it allows you to run rollout in parallel in the cloud, so you don't need a powerful machine to run evaluation.
Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
```bash
./evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
# Example - This evaluates patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe-bench-lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_100_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
```
To clean-up all existing runtimes that you've already started, run:
```bash
ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
```
## SWT-Bench Evaluation

View File

@@ -26,7 +26,7 @@ from evaluation.utils.shared import (
from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_parser,
get_evaluation_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
@@ -353,7 +353,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--input-file',
type=str,

View File

@@ -43,8 +43,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.config.utils import get_condenser_config_arg
@@ -732,7 +732,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -28,8 +28,8 @@ from evaluation.utils.shared import (
)
from openhands.controller.state.state import State
from openhands.core.config import (
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.config.utils import get_condenser_config_arg
@@ -201,7 +201,7 @@ def process_instance(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -644,7 +644,7 @@ SWEGYM_EXCLUDE_IDS = [
]
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -1,46 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
INPUT_FILE=$1
NUM_WORKERS=$2
DATASET=$3
SPLIT=$4
if [ -z "$INPUT_FILE" ]; then
echo "INPUT_FILE not specified (should be a path to a jsonl file)"
exit 1
fi
if [ -z "$DATASET" ]; then
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
DATASET="princeton-nlp/SWE-bench_Lite"
fi
if [ -z "$SPLIT" ]; then
echo "SPLIT not specified, use default test"
SPLIT="test"
fi
if [ -z "$NUM_WORKERS" ]; then
echo "NUM_WORKERS not specified, use default 1"
NUM_WORKERS=1
fi
echo "... Evaluating on $INPUT_FILE ..."
COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
--eval-num-workers $NUM_WORKERS \
--input-file $INPUT_FILE \
--dataset $DATASET \
--split $SPLIT"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
# update the output with evaluation results
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $INPUT_FILE

View File

@@ -5,8 +5,7 @@ pynguin_ids = ['pydata__xarray-6548-16541', 'pydata__xarray-7003-16557', 'pydata
ids = ['pydata__xarray-3114-16452', 'pydata__xarray-3151-16453', 'pydata__xarray-3156-16454', 'pydata__xarray-3239-16456', 'pydata__xarray-3239-16457', 'pydata__xarray-3239-16458', 'pydata__xarray-3302-16459', 'pydata__xarray-3364-16461', 'pydata__xarray-3677-16471', 'pydata__xarray-3905-16478', 'pydata__xarray-4182-16484', 'pydata__xarray-4248-16486', 'pydata__xarray-4339-16487', 'pydata__xarray-4419-16488', 'pydata__xarray-4629-16492', 'pydata__xarray-4750-16496', 'pydata__xarray-4802-16505', 'pydata__xarray-4966-16515', 'pydata__xarray-4994-16516', 'pydata__xarray-5033-16517', 'pydata__xarray-5126-16518', 'pydata__xarray-5126-16519', 'pydata__xarray-5131-16520', 'pydata__xarray-5365-16529', 'pydata__xarray-5455-16530', 'pydata__xarray-5662-16532', 'pydata__xarray-5731-16534', 'pydata__xarray-6135-16535', 'pydata__xarray-6135-16536', 'pydata__xarray-6386-16537', 'pydata__xarray-6394-16538', 'pydata__xarray-6400-16539', 'pydata__xarray-6461-16540', 'pydata__xarray-6548-16541', 'pydata__xarray-6599-16543', 'pydata__xarray-6601-16544', 'pydata__xarray-6882-16548', 'pydata__xarray-6889-16549', 'pydata__xarray-7003-16557', 'pydata__xarray-7147-16571', 'pydata__xarray-7150-16572', 'pydata__xarray-7203-16577', 'pydata__xarray-7229-16578', 'pydata__xarray-7393-16581', 'pydata__xarray-7400-16582']
Command eval (our approach):
poetry run ./evaluation/benchmarks/testgeneval/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/kjain14__testgeneval-test/CodeActAgent/gpt-4o_maxiter_25_N_v0.20.0-no-hint-run_1/output.jsonl 10 kjain14/testgeneval test true
Command run (our approach):
./evaluation/benchmarks/testgeneval/scripts/run_infer.sh llm.eval_gpt HEAD CodeActAgent -1 25 10 kjain14/testgeneval test 1 ../TestGenEval/results/testgeneval/preds/gpt-4o-2024-08-06__testgeneval__0.2__test.jsonl

View File

@@ -41,7 +41,7 @@ from evaluation.utils.shared import (
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.core.config import OpenHandsConfig, SandboxConfig, get_parser
from openhands.core.config import OpenHandsConfig, SandboxConfig, get_evaluation_parser
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
@@ -484,7 +484,7 @@ def count_and_log_fields(evaluated_predictions, fields, key):
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--input-file', type=str, required=True, help='Path to input predictions file'
)

View File

@@ -37,8 +37,8 @@ from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
SandboxConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -491,7 +491,7 @@ def prepare_dataset_pre(dataset: pd.DataFrame, filter_column: str) -> pd.DataFra
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -18,8 +18,8 @@ from openhands.core.config import (
LLMConfig,
OpenHandsConfig,
get_agent_config_arg,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.config.agent_config import AgentConfig
from openhands.core.logger import openhands_logger as logger
@@ -197,7 +197,7 @@ def run_evaluator(
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--task-image-name',
type=str,

View File

@@ -19,8 +19,8 @@ from evaluation.utils.shared import (
from openhands.controller.state.state import State
from openhands.core.config import (
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -157,7 +157,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
@@ -565,7 +565,7 @@ SWEGYM_EXCLUDE_IDS = [
]
if __name__ == '__main__':
parser = get_parser()
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { FileService } from "#/api/file-service/file-service.api";
import OpenHands from "#/api/open-hands";
import {
FILE_VARIANTS_1,
FILE_VARIANTS_2,
@@ -10,20 +10,20 @@ import {
* You can find the mock handlers in `frontend/src/mocks/file-service-handlers.ts`.
*/
describe("FileService", () => {
describe("OpenHands File API", () => {
it("should get a list of files", async () => {
await expect(FileService.getFiles("test-conversation-id")).resolves.toEqual(
await expect(OpenHands.getFiles("test-conversation-id")).resolves.toEqual(
FILE_VARIANTS_1,
);
await expect(
FileService.getFiles("test-conversation-id-2"),
OpenHands.getFiles("test-conversation-id-2"),
).resolves.toEqual(FILE_VARIANTS_2);
});
it("should get content of a file", async () => {
await expect(
FileService.getFile("test-conversation-id", "file1.txt"),
OpenHands.getFile("test-conversation-id", "file1.txt"),
).resolves.toEqual("Content of file1.txt");
});
});

View File

@@ -3,8 +3,6 @@ import { afterEach, describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { LaunchMicroagentModal } from "#/components/features/chat/microagent/launch-microagent-modal";
import { MemoryService } from "#/api/memory-service/memory-service.api";
import { FileService } from "#/api/file-service/file-service.api";
import { I18nKey } from "#/i18n/declaration";
vi.mock("react-router", async () => ({

View File

@@ -82,5 +82,11 @@ describe("extractModelAndProvider", () => {
model: "claude-opus-4-20250514",
separator: "/",
});
expect(extractModelAndProvider("claude-opus-4-1-20250805")).toEqual({
provider: "anthropic",
model: "claude-opus-4-1-20250805",
separator: "/",
});
});
});

View File

@@ -1,44 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
<meta name="msapplication-TileColor" content="#da532c">
<meta name="theme-color" content="#ffffff">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta
name="description"
content="OpenHands: Code Less, Make More"
/>
<!--
Notice the use of %PUBLIC_URL% in the tags above.
It will be replaced with the URL of the `public` folder during the build.
Only files inside the `public` folder can be referenced from the HTML.
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>OpenHands</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
To begin the development, run `npm start` or `yarn start`.
To create a production bundle, use `npm run build` or `yarn build`.
-->
<script type="module" src="/src/index.tsx"></script>
</body>
</html>

View File

@@ -1,66 +0,0 @@
import { openHands } from "../open-hands-axios";
import { GetFilesResponse, GetFileResponse } from "./file-service.types";
import { getConversationUrl } from "../conversation.utils";
import { FileUploadSuccessResponse } from "../open-hands.types";
export class FileService {
/**
* Retrieve the list of files available in the workspace
* @param conversationId ID of the conversation
* @param path Path to list files from. If provided, it lists all the files in the given path
* @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
*/
static async getFiles(
conversationId: string,
path?: string,
): Promise<GetFilesResponse> {
const url = `${getConversationUrl(conversationId)}/list-files`;
const { data } = await openHands.get<GetFilesResponse>(url, {
params: { path },
});
return data;
}
/**
* Retrieve the content of a file
* @param conversationId ID of the conversation
* @param path Full path of the file to retrieve
* @returns Code content of the file
*/
static async getFile(conversationId: string, path: string): Promise<string> {
const url = `${getConversationUrl(conversationId)}/select-file`;
const { data } = await openHands.get<GetFileResponse>(url, {
params: { file: path },
});
return data.code;
}
/**
* Upload multiple files to the workspace
* @param conversationId ID of the conversation
* @param files List of files.
* @returns list of uploaded files, list of skipped files
*/
static async uploadFiles(
conversationId: string,
files: File[],
): Promise<FileUploadSuccessResponse> {
const formData = new FormData();
for (const file of files) {
formData.append("files", file);
}
const url = `${getConversationUrl(conversationId)}/upload-files`;
const response = await openHands.post<FileUploadSuccessResponse>(
url,
formData,
{
headers: {
"Content-Type": "multipart/form-data",
},
},
);
return response.data;
}
}

View File

@@ -1,5 +0,0 @@
export type GetFilesResponse = string[];
export interface GetFileResponse {
code: string;
}

View File

@@ -1,21 +0,0 @@
import { openHands } from "../open-hands-axios";
interface GetPromptResponse {
status: string;
prompt: string;
}
export class MemoryService {
static async getPrompt(
conversationId: string,
eventId: number,
): Promise<string> {
const { data } = await openHands.get<GetPromptResponse>(
`/api/conversations/${conversationId}/remember_prompt`,
{
params: { event_id: eventId },
},
);
return data.prompt;
}
}

View File

@@ -15,6 +15,9 @@ import {
GetMicroagentPromptResponse,
CreateMicroagent,
MicroagentContentResponse,
FileUploadSuccessResponse,
GetFilesResponse,
GetFileResponse,
} from "./open-hands.types";
import { openHands } from "./open-hands-axios";
import { ApiSettings, PostApiSettings, Provider } from "#/types/settings";
@@ -618,12 +621,11 @@ class OpenHands {
conversationId: string,
eventId: number,
): Promise<string> {
const { data } = await openHands.get<GetMicroagentPromptResponse>(
`/api/conversations/${conversationId}/remember_prompt`,
{
params: { event_id: eventId },
},
);
const url = `${this.getConversationUrl(conversationId)}/remember-prompt`;
const { data } = await openHands.get<GetMicroagentPromptResponse>(url, {
params: { event_id: eventId },
headers: this.getConversationHeaders(),
});
return data.prompt;
}
@@ -640,6 +642,69 @@ class OpenHands {
return data;
}
/**
* Retrieve the list of files available in the workspace
* @param conversationId ID of the conversation
* @param path Path to list files from. If provided, it lists all the files in the given path
* @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
*/
static async getFiles(
conversationId: string,
path?: string,
): Promise<GetFilesResponse> {
const url = `${this.getConversationUrl(conversationId)}/list-files`;
const { data } = await openHands.get<GetFilesResponse>(url, {
params: { path },
headers: this.getConversationHeaders(),
});
return data;
}
/**
* Retrieve the content of a file
* @param conversationId ID of the conversation
* @param path Full path of the file to retrieve
* @returns Code content of the file
*/
static async getFile(conversationId: string, path: string): Promise<string> {
const url = `${this.getConversationUrl(conversationId)}/select-file`;
const { data } = await openHands.get<GetFileResponse>(url, {
params: { file: path },
headers: this.getConversationHeaders(),
});
return data.code;
}
/**
* Upload multiple files to the workspace
* @param conversationId ID of the conversation
* @param files List of files.
* @returns list of uploaded files, list of skipped files
*/
static async uploadFiles(
conversationId: string,
files: File[],
): Promise<FileUploadSuccessResponse> {
const formData = new FormData();
for (const file of files) {
formData.append("files", file);
}
const url = `${this.getConversationUrl(conversationId)}/upload-files`;
const response = await openHands.post<FileUploadSuccessResponse>(
url,
formData,
{
headers: {
"Content-Type": "multipart/form-data",
...this.getConversationHeaders(),
},
},
);
return response.data;
}
/**
* Get the user installation IDs
* @param provider The provider to get installation IDs for (github, bitbucket, etc.)

View File

@@ -158,3 +158,9 @@ export interface MicroagentContentResponse {
git_provider: Provider;
triggers: string[];
}
export type GetFilesResponse = string[];
export interface GetFileResponse {
code: string;
}

View File

@@ -1,11 +1,11 @@
import { useMutation } from "@tanstack/react-query";
import { FileService } from "#/api/file-service/file-service.api";
import OpenHands from "#/api/open-hands";
export const useUploadFiles = () =>
useMutation({
mutationKey: ["upload-files"],
mutationFn: (variables: { conversationId: string; files: File[] }) =>
FileService.uploadFiles(variables.conversationId!, variables.files),
OpenHands.uploadFiles(variables.conversationId!, variables.files),
onSuccess: async () => {},
meta: {
disableToast: true,

View File

@@ -1,13 +1,13 @@
import { useQuery } from "@tanstack/react-query";
import { useConversationId } from "../use-conversation-id";
import { FileService } from "#/api/file-service/file-service.api";
import OpenHands from "#/api/open-hands";
export const useGetMicroagents = (microagentDirectory: string) => {
const { conversationId } = useConversationId();
return useQuery({
queryKey: ["files", "microagents", conversationId, microagentDirectory],
queryFn: () => FileService.getFiles(conversationId!, microagentDirectory),
queryFn: () => OpenHands.getFiles(conversationId!, microagentDirectory),
enabled: !!conversationId,
select: (data) =>
data.map((fileName) => fileName.replace(microagentDirectory, "")),

View File

@@ -1,5 +1,5 @@
import { useQuery } from "@tanstack/react-query";
import { MemoryService } from "#/api/memory-service/memory-service.api";
import OpenHands from "#/api/open-hands";
import { useConversationId } from "../use-conversation-id";
export const useMicroagentPrompt = (eventId: number) => {
@@ -7,7 +7,7 @@ export const useMicroagentPrompt = (eventId: number) => {
return useQuery({
queryKey: ["memory", "prompt", conversationId, eventId],
queryFn: () => MemoryService.getPrompt(conversationId!, eventId),
queryFn: () => OpenHands.getMicroagentPrompt(conversationId!, eventId),
enabled: !!conversationId,
staleTime: 1000 * 60 * 5, // 5 minutes
gcTime: 1000 * 60 * 15, // 15 minutes

View File

@@ -222,7 +222,7 @@ function AppSettingsScreen() {
className="w-full max-w-[680px]" // Match the width of the language field
/>
<div className="border-t border-t-tertiary pt-6 mt-2">
<div className="border-t border-t-tertiary pt-6 mt-2 hidden">
<h3 className="text-lg font-medium mb-4">
{t(I18nKey.SETTINGS$GIT_SETTINGS)}
</h3>

View File

@@ -14,6 +14,7 @@ export const VERIFIED_MODELS = [
"claude-3-7-sonnet-20250219",
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
"gemini-2.5-pro",
"o4-mini",
"deepseek-chat",
@@ -22,11 +23,13 @@ export const VERIFIED_MODELS = [
"devstral-medium-2507",
"kimi-k2-0711-preview",
"qwen3-coder-480b",
"gpt-5-2025-08-07",
];
// LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
// (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
export const VERIFIED_OPENAI_MODELS = [
"gpt-5-2025-08-07",
"gpt-4o",
"gpt-4o-mini",
"gpt-4.1",
@@ -47,6 +50,7 @@ export const VERIFIED_ANTHROPIC_MODELS = [
"claude-3-7-sonnet-20250219",
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
];
// LiteLLM does not return the compatible Mistral models with the provider, so we list them here to set them ourselves
@@ -61,7 +65,9 @@ export const VERIFIED_MISTRAL_MODELS = [
// (e.g., they return `claude-sonnet-4-20250514` instead of `openhands/claude-sonnet-4-20250514`)
export const VERIFIED_OPENHANDS_MODELS = [
"claude-sonnet-4-20250514",
"gpt-5-2025-08-07",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
"gemini-2.5-pro",
"o3",
"o4-mini",

View File

@@ -1,3 +1,4 @@
import re
import sys
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
@@ -37,7 +38,16 @@ _SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.
def refine_prompt(prompt: str):
if sys.platform == 'win32':
return prompt.replace('bash', 'powershell')
# Replace 'bash' with 'powershell' including tool names like 'execute_bash'
# First replace 'execute_bash' with 'execute_powershell' to handle tool names
result = re.sub(
r'\bexecute_bash\b', 'execute_powershell', prompt, flags=re.IGNORECASE
)
# Then replace standalone 'bash' with 'powershell'
result = re.sub(
r'(?<!execute_)(?<!_)\bbash\b', 'powershell', result, flags=re.IGNORECASE
)
return result
return prompt

View File

@@ -0,0 +1 @@
"""OpenHands CLI module."""

54
openhands/cli/entry.py Normal file
View File

@@ -0,0 +1,54 @@
"""Main entry point for OpenHands CLI with subcommand support."""
import sys
import openhands
import openhands.cli.suppress_warnings # noqa: F401
from openhands.cli.gui_launcher import launch_gui_server
from openhands.cli.main import run_cli_command
from openhands.core.config import get_cli_parser
from openhands.core.config.arg_utils import get_subparser
def main():
"""Main entry point with subcommand support and backward compatibility."""
parser = get_cli_parser()
# If user only asks for --help or -h without a subcommand
if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'):
# Print top-level help
print(parser.format_help())
# Also print help for `cli` subcommand
print('\n' + '=' * 80)
print('CLI command help:\n')
cli_parser = get_subparser(parser, 'cli')
print(cli_parser.format_help())
sys.exit(0)
# Special case: no subcommand provided, simulate "openhands cli"
if len(sys.argv) == 1 or (
len(sys.argv) > 1 and sys.argv[1] not in ['cli', 'serve']
):
# Inject 'cli' as default command
sys.argv.insert(1, 'cli')
args = parser.parse_args()
if hasattr(args, 'version') and args.version:
print(f'OpenHands CLI version: {openhands.get_version()}')
sys.exit(0)
if args.command == 'serve':
launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
elif args.command == 'cli' or args.command is None:
run_cli_command(args)
else:
parser.print_help()
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,219 @@
"""GUI launcher for OpenHands CLI."""
import os
import shutil
import subprocess
import sys
from pathlib import Path
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from openhands import __version__
def _format_docker_command_for_logging(cmd: list[str]) -> str:
"""Format a Docker command for logging with grey color.
Args:
cmd (list[str]): The Docker command as a list of strings
Returns:
str: The formatted command string in grey HTML color
"""
cmd_str = ' '.join(cmd)
return f'<grey>Running Docker command: {cmd_str}</grey>'
def check_docker_requirements() -> bool:
"""Check if Docker is installed and running.
Returns:
bool: True if Docker is available and running, False otherwise.
"""
# Check if Docker is installed
if not shutil.which('docker'):
print_formatted_text(
HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
)
print_formatted_text(
HTML(
'<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
)
)
return False
# Check if Docker daemon is running
try:
result = subprocess.run(
['docker', 'info'], capture_output=True, text=True, timeout=10
)
if result.returncode != 0:
print_formatted_text(
HTML('<ansired>❌ Docker daemon is not running.</ansired>')
)
print_formatted_text(
HTML('<grey>Please start Docker and try again.</grey>')
)
return False
except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
print_formatted_text(
HTML('<ansired>❌ Failed to check Docker status.</ansired>')
)
print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
return False
return True
def ensure_config_dir_exists() -> Path:
"""Ensure the OpenHands configuration directory exists and return its path."""
config_dir = Path.home() / '.openhands'
config_dir.mkdir(exist_ok=True)
return config_dir
def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
"""Launch the OpenHands GUI server using Docker.
Args:
mount_cwd: If True, mount the current working directory into the container.
gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
"""
print_formatted_text(
HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
)
print_formatted_text('')
# Check Docker requirements
if not check_docker_requirements():
sys.exit(1)
# Ensure config directory exists
config_dir = ensure_config_dir_exists()
# Get the current version for the Docker image
version = __version__
runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
# Pull the runtime image first
pull_cmd = ['docker', 'pull', runtime_image]
print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
try:
subprocess.run(
pull_cmd,
check=True,
timeout=300, # 5 minutes timeout
)
except subprocess.CalledProcessError:
print_formatted_text(
HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
)
sys.exit(1)
except subprocess.TimeoutExpired:
print_formatted_text(
HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
)
sys.exit(1)
print_formatted_text('')
print_formatted_text(
HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
)
print_formatted_text(
HTML('<grey>The server will be available at: http://localhost:3000</grey>')
)
print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
print_formatted_text('')
# Build the Docker command
docker_cmd = [
'docker',
'run',
'-it',
'--rm',
'--pull=always',
'-e',
f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
'-e',
'LOG_ALL_EVENTS=true',
'-v',
'/var/run/docker.sock:/var/run/docker.sock',
'-v',
f'{config_dir}:/.openhands',
]
# Add GPU support if requested
if gpu:
print_formatted_text(
HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
)
# Add the --gpus all flag to enable all GPUs
docker_cmd.insert(2, '--gpus')
docker_cmd.insert(3, 'all')
# Add environment variable to pass GPU support to sandbox containers
docker_cmd.extend(
[
'-e',
'SANDBOX_ENABLE_GPU=true',
]
)
# Add current working directory mount if requested
if mount_cwd:
cwd = Path.cwd()
# Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
docker_cmd.extend(
[
'-e',
f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
]
)
# Set user ID for Unix-like systems only
if os.name != 'nt': # Not Windows
try:
user_id = subprocess.check_output(['id', '-u'], text=True).strip()
docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
except (subprocess.CalledProcessError, FileNotFoundError):
# If 'id' command fails or doesn't exist, skip setting user ID
pass
# Print the folder that will be mounted to inform the user
print_formatted_text(
HTML(
f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
)
)
docker_cmd.extend(
[
'-p',
'3000:3000',
'--add-host',
'host.docker.internal:host-gateway',
'--name',
'openhands-app',
app_image,
]
)
try:
# Log and run the Docker command
print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
subprocess.run(docker_cmd, check=True)
except subprocess.CalledProcessError as e:
print_formatted_text('')
print_formatted_text(
HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
)
print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
sys.exit(1)
except KeyboardInterrupt:
print_formatted_text('')
print_formatted_text(
HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
)
sys.exit(0)

View File

@@ -45,7 +45,6 @@ from openhands.controller import AgentController
from openhands.controller.agent import Agent
from openhands.core.config import (
OpenHandsConfig,
parse_arguments,
setup_config_from_args,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
@@ -129,12 +128,13 @@ async def run_session(
conversation_instructions: str | None = None,
session_name: str | None = None,
skip_banner: bool = False,
conversation_id: str | None = None,
) -> bool:
reload_microagents = False
new_session_requested = False
exit_reason = ExitReason.INTENTIONAL
sid = generate_sid(config, session_name)
sid = conversation_id or generate_sid(config, session_name)
is_loaded = asyncio.Event()
is_paused = asyncio.Event() # Event to track agent pause requests
always_confirm_mode = False # Flag to enable always confirm mode
@@ -523,10 +523,8 @@ def run_alias_setup_flow(config: OpenHandsConfig) -> None:
print_formatted_text('')
async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
async def main_with_loop(loop: asyncio.AbstractEventLoop, args) -> None:
"""Runs the agent in CLI mode."""
args = parse_arguments()
# Set log level from command line argument if provided
if args.log_level and isinstance(args.log_level, str):
log_level = getattr(logging, str(args.log_level).upper())
@@ -574,13 +572,9 @@ async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
# Use settings from settings store if available and override with command line arguments
if settings:
# Handle agent configuration
if args.agent_cls:
config.default_agent = str(args.agent_cls)
else:
# settings.agent is not None because we check for it in setup_config_from_args
assert settings.agent is not None
config.default_agent = settings.agent
# settings.agent is not None because we check for it in setup_config_from_args
assert settings.agent is not None
config.default_agent = settings.agent
# Handle LLM configuration with proper precedence:
# 1. CLI parameters (-l) have highest precedence (already handled in setup_config_from_args)
@@ -705,6 +699,7 @@ After reviewing the file, please ask the user what they would like to do with it
task_str,
session_name=args.name,
skip_banner=banner_shown,
conversation_id=args.conversation,
)
# If a new session was requested, run it
@@ -717,18 +712,19 @@ After reviewing the file, please ask the user what they would like to do with it
get_runtime_cls(config.runtime).teardown(config)
def main():
def run_cli_command(args):
"""Run the CLI command with proper error handling and cleanup."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(main_with_loop(loop))
loop.run_until_complete(main_with_loop(loop, args))
except KeyboardInterrupt:
print_formatted_text('⚠️ Session was interrupted: interrupted\n')
except ConnectionRefusedError as e:
print(f'Connection refused: {e}')
print_formatted_text(f'Connection refused: {e}')
sys.exit(1)
except Exception as e:
print(f'An error occurred: {e}')
print_formatted_text(f'An error occurred: {e}')
sys.exit(1)
finally:
try:
@@ -741,9 +737,5 @@ def main():
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
loop.close()
except Exception as e:
print(f'Error during cleanup: {e}')
print_formatted_text(f'Error during cleanup: {e}')
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -27,7 +27,7 @@ from openhands.core.config.condenser_config import (
CondenserPipelineConfig,
ConversationWindowCondenserConfig,
)
from openhands.core.config.utils import OH_DEFAULT_AGENT
from openhands.core.config.config_utils import OH_DEFAULT_AGENT
from openhands.memory.condenser.impl.llm_summarizing_condenser import (
LLMSummarizingCondenserConfig,
)

View File

@@ -150,6 +150,7 @@ def organize_models_and_providers(
VERIFIED_PROVIDERS = ['openhands', 'anthropic', 'openai', 'mistral']
VERIFIED_OPENAI_MODELS = [
'gpt-5-2025-08-07',
'o4-mini',
'gpt-4o',
'gpt-4o-mini',
@@ -164,6 +165,7 @@ VERIFIED_OPENAI_MODELS = [
VERIFIED_ANTHROPIC_MODELS = [
'claude-sonnet-4-20250514',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'claude-3-7-sonnet-20250219',
'claude-3-sonnet-20240229',
'claude-3-opus-20240229',
@@ -183,7 +185,9 @@ VERIFIED_MISTRAL_MODELS = [
VERIFIED_OPENHANDS_MODELS = [
'claude-sonnet-4-20250514',
'gpt-5-2025-08-07',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'devstral-small-2507',
'devstral-medium-2507',
'o3',

View File

@@ -1,4 +1,9 @@
from openhands.core.config.agent_config import AgentConfig
from openhands.core.config.arg_utils import (
get_cli_parser,
get_evaluation_parser,
get_headless_parser,
)
from openhands.core.config.cli_config import CLIConfig
from openhands.core.config.config_utils import (
OH_DEFAULT_AGENT,
@@ -15,7 +20,6 @@ from openhands.core.config.utils import (
finalize_config,
get_agent_config_arg,
get_llm_config_arg,
get_parser,
load_from_env,
load_from_toml,
load_openhands_config,
@@ -41,7 +45,9 @@ __all__ = [
'get_agent_config_arg',
'get_llm_config_arg',
'get_field_info',
'get_parser',
'get_cli_parser',
'get_headless_parser',
'get_evaluation_parser',
'parse_arguments',
'setup_config_from_args',
]

View File

@@ -0,0 +1,224 @@
"""Centralized command line argument configuration for OpenHands CLI and headless modes."""
import argparse
from argparse import ArgumentParser, _SubParsersAction
def get_subparser(parser: ArgumentParser, name: str) -> ArgumentParser:
for action in parser._actions:
if isinstance(action, _SubParsersAction):
if name in action.choices:
return action.choices[name]
raise ValueError(f"Subparser '{name}' not found")
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
"""Add common arguments shared between CLI and headless modes."""
parser.add_argument(
'--config-file',
type=str,
default='config.toml',
help='Path to the config file (default: config.toml in the current directory)',
)
parser.add_argument(
'-t',
'--task',
type=str,
default='',
help='The task for the agent to perform',
)
parser.add_argument(
'-f',
'--file',
type=str,
help='Path to a file containing the task. Overrides -t if both are provided.',
)
parser.add_argument(
'-n',
'--name',
help='Session name',
type=str,
default='',
)
parser.add_argument(
'--log-level',
help='Set the log level',
type=str,
default=None,
)
parser.add_argument(
'-l',
'--llm-config',
default=None,
type=str,
help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
)
parser.add_argument(
'--agent-config',
default=None,
type=str,
help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
)
parser.add_argument(
'-v', '--version', action='store_true', help='Show version information'
)
def add_evaluation_arguments(parser: argparse.ArgumentParser) -> None:
"""Add arguments specific to evaluation mode."""
# Evaluation-specific arguments
parser.add_argument(
'--eval-output-dir',
default='evaluation/evaluation_outputs/outputs',
type=str,
help='The directory to save evaluation output',
)
parser.add_argument(
'--eval-n-limit',
default=None,
type=int,
help='The number of instances to evaluate',
)
parser.add_argument(
'--eval-num-workers',
default=4,
type=int,
help='The number of workers to use for evaluation',
)
parser.add_argument(
'--eval-note',
default=None,
type=str,
help='The note to add to the evaluation directory',
)
parser.add_argument(
'--eval-ids',
default=None,
type=str,
help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
)
def add_headless_specific_arguments(parser: argparse.ArgumentParser) -> None:
"""Add arguments specific to headless mode (full evaluation suite)."""
parser.add_argument(
'-d',
'--directory',
type=str,
help='The working directory for the agent',
)
parser.add_argument(
'-c',
'--agent-cls',
default=None,
type=str,
help='Name of the default agent to use',
)
parser.add_argument(
'-i',
'--max-iterations',
default=None,
type=int,
help='The maximum number of iterations to run the agent',
)
parser.add_argument(
'-b',
'--max-budget-per-task',
type=float,
help='The maximum budget allowed per task, beyond which the agent will stop.',
)
# Additional headless-specific arguments
parser.add_argument(
'--no-auto-continue',
help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
action='store_true',
default=False,
)
parser.add_argument(
'--selected-repo',
help='GitHub repository to clone (format: owner/repo)',
type=str,
default=None,
)
def get_cli_parser() -> argparse.ArgumentParser:
"""Create argument parser for CLI mode with simplified argument set."""
# Create a description with welcome message explaining available commands
description = (
'Welcome to OpenHands: Code Less, Make More\n\n'
'OpenHands supports two main commands:\n'
' serve - Launch the OpenHands GUI server (web interface)\n'
' cli - Run OpenHands in CLI mode (terminal interface)\n\n'
'Running "openhands" without a command is the same as "openhands cli"'
)
parser = argparse.ArgumentParser(
description=description,
prog='openhands',
formatter_class=argparse.RawDescriptionHelpFormatter, # Preserve formatting in description
epilog='For more information about a command, run: openhands COMMAND --help',
)
# Create subparsers
subparsers = parser.add_subparsers(
dest='command',
title='commands',
description='OpenHands supports two main commands:',
metavar='COMMAND',
)
# Add 'serve' subcommand
serve_parser = subparsers.add_parser(
'serve', help='Launch the OpenHands GUI server using Docker (web interface)'
)
serve_parser.add_argument(
'--mount-cwd',
help='Mount the current working directory into the GUI server container',
action='store_true',
default=False,
)
serve_parser.add_argument(
'--gpu',
help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
action='store_true',
default=False,
)
# Add 'cli' subcommand - import all the existing CLI arguments
cli_parser = subparsers.add_parser(
'cli', help='Run OpenHands in CLI mode (terminal interface)'
)
add_common_arguments(cli_parser)
cli_parser.add_argument(
'--override-cli-mode',
help='Override the default settings for CLI mode',
type=bool,
default=False,
)
parser.add_argument(
'--conversation',
help='The conversation id to continue',
type=str,
default=None,
)
return parser
def get_headless_parser() -> argparse.ArgumentParser:
"""Create argument parser for headless mode with full argument set."""
parser = argparse.ArgumentParser(description='Run the agent via CLI')
add_common_arguments(parser)
add_headless_specific_arguments(parser)
return parser
def get_evaluation_parser() -> argparse.ArgumentParser:
"""Create argument parser for evaluation mode."""
parser = argparse.ArgumentParser(description='Run OpenHands in evaluation mode')
add_common_arguments(parser)
add_headless_specific_arguments(parser)
add_evaluation_arguments(parser)
return parser

View File

@@ -1,3 +1,5 @@
from __future__ import annotations
import os
import re
import shlex
@@ -302,6 +304,13 @@ class MCPConfig(BaseModel):
raise ValueError(f'Invalid MCP configuration: {e}')
return mcp_mapping
def merge(self, other: MCPConfig):
return MCPConfig(
sse_servers=self.sse_servers + other.sse_servers,
stdio_servers=self.stdio_servers + other.stdio_servers,
shttp_servers=self.shttp_servers + other.shttp_servers,
)
class OpenHandsMCPConfig:
@staticmethod

View File

@@ -15,15 +15,12 @@ from pydantic import BaseModel, SecretStr, ValidationError
from openhands import __version__
from openhands.core import logger
from openhands.core.config.agent_config import AgentConfig
from openhands.core.config.arg_utils import get_headless_parser
from openhands.core.config.condenser_config import (
CondenserConfig,
condenser_config_from_toml_section,
create_condenser_config,
)
from openhands.core.config.config_utils import (
OH_DEFAULT_AGENT,
OH_MAX_ITERATIONS,
)
from openhands.core.config.extended_config import ExtendedConfig
from openhands.core.config.kubernetes_config import KubernetesConfig
from openhands.core.config.llm_config import LLMConfig
@@ -80,6 +77,17 @@ def load_from_env(
set_attr_from_env(field_value, prefix=field_name + '_')
elif env_var_name in env_or_toml_dict:
# Special case: avoid overriding workspace_mount_path_in_sandbox from env
# when SANDBOX_VOLUMES is set without an explicit /workspace mount.
if (
isinstance(sub_config, OpenHandsConfig)
and field_name == 'workspace_mount_path_in_sandbox'
):
vols = env_or_toml_dict.get('SANDBOX_VOLUMES')
if vols and '/workspace' not in str(vols):
# Skip overriding; keep the default '/workspace'
continue
# convert the env var to the correct type and set it
value = env_or_toml_dict[env_var_name]
@@ -674,142 +682,9 @@ def get_condenser_config_arg(
return None
# Command line arguments
def get_parser() -> argparse.ArgumentParser:
"""Get the argument parser."""
parser = argparse.ArgumentParser(description='Run the agent via CLI')
# Add version argument
parser.add_argument(
'-v', '--version', action='store_true', help='Show version information'
)
parser.add_argument(
'--config-file',
type=str,
default='config.toml',
help='Path to the config file (default: config.toml in the current directory)',
)
parser.add_argument(
'-d',
'--directory',
type=str,
help='The working directory for the agent',
)
parser.add_argument(
'-t',
'--task',
type=str,
default='',
help='The task for the agent to perform',
)
parser.add_argument(
'-f',
'--file',
type=str,
help='Path to a file containing the task. Overrides -t if both are provided.',
)
parser.add_argument(
'-c',
'--agent-cls',
default=OH_DEFAULT_AGENT,
type=str,
help='Name of the default agent to use',
)
parser.add_argument(
'-i',
'--max-iterations',
default=OH_MAX_ITERATIONS,
type=int,
help='The maximum number of iterations to run the agent',
)
parser.add_argument(
'-b',
'--max-budget-per-task',
type=float,
help='The maximum budget allowed per task, beyond which the agent will stop.',
)
# --eval configs are for evaluations only
parser.add_argument(
'--eval-output-dir',
default='evaluation/evaluation_outputs/outputs',
type=str,
help='The directory to save evaluation output',
)
parser.add_argument(
'--eval-n-limit',
default=None,
type=int,
help='The number of instances to evaluate',
)
parser.add_argument(
'--eval-num-workers',
default=4,
type=int,
help='The number of workers to use for evaluation',
)
parser.add_argument(
'--eval-note',
default=None,
type=str,
help='The note to add to the evaluation directory',
)
parser.add_argument(
'-l',
'--llm-config',
default=None,
type=str,
help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
)
parser.add_argument(
'--agent-config',
default=None,
type=str,
help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
)
parser.add_argument(
'-n',
'--name',
help='Session name',
type=str,
default='',
)
parser.add_argument(
'--eval-ids',
default=None,
type=str,
help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
)
parser.add_argument(
'--no-auto-continue',
help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
action='store_true',
default=False,
)
parser.add_argument(
'--selected-repo',
help='GitHub repository to clone (format: owner/repo)',
type=str,
default=None,
)
parser.add_argument(
'--override-cli-mode',
help='Override the default settings for CLI mode',
type=bool,
default=False,
)
parser.add_argument(
'--log-level',
help='Set the log level',
type=str,
default=None,
)
return parser
def parse_arguments() -> argparse.Namespace:
"""Parse command line arguments."""
parser = get_parser()
parser = get_headless_parser()
args = parser.parse_args()
if args.version:
@@ -914,17 +789,17 @@ def setup_config_from_args(args: argparse.Namespace) -> OpenHandsConfig:
)
# Override default agent if provided
if args.agent_cls:
if hasattr(args, 'agent_cls') and args.agent_cls:
config.default_agent = args.agent_cls
# Set max iterations and max budget per task if provided, otherwise fall back to config values
if args.max_iterations is not None:
if hasattr(args, 'max_iterations') and args.max_iterations is not None:
config.max_iterations = args.max_iterations
if args.max_budget_per_task is not None:
if hasattr(args, 'max_budget_per_task') and args.max_budget_per_task is not None:
config.max_budget_per_task = args.max_budget_per_task
# Read selected repository in config for use by CLI and main.py
if args.selected_repo is not None:
if hasattr(args, 'selected_repo') and args.selected_repo is not None:
config.sandbox.selected_repo = args.selected_repo
return config

View File

@@ -383,7 +383,7 @@ Do NOT assume the environment is the same as in the example above.
"""
example = example.lstrip()
return example
return refine_prompt(example)
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools

View File

@@ -63,6 +63,7 @@ CACHE_PROMPT_SUPPORTED_MODELS = [
'claude-sonnet-4-20250514',
'claude-sonnet-4',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
]
# function calling supporting models
@@ -77,6 +78,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
'claude-sonnet-4-20250514',
'claude-sonnet-4',
'claude-opus-4-20250514',
'claude-opus-4-1-20250805',
'gpt-4o-mini',
'gpt-4o',
'o1-2024-12-17',
@@ -92,6 +94,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [
'kimi-k2-instruct',
'Qwen3-Coder-480B-A35B-Instruct',
'qwen3-coder', # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
'gpt-5-2025-08-07',
]
REASONING_EFFORT_SUPPORTED_MODELS = [
@@ -105,6 +108,7 @@ REASONING_EFFORT_SUPPORTED_MODELS = [
'o4-mini-2025-04-16',
'gemini-2.5-flash',
'gemini-2.5-pro',
'gpt-5-2025-08-07',
]
MODELS_WITHOUT_STOP_WORDS = [

View File

@@ -10,17 +10,18 @@ from jinja2 import Environment, FileSystemLoader
from pydantic import BaseModel, ConfigDict, Field
from openhands.core.config.llm_config import LLMConfig
from openhands.core.config.mcp_config import MCPConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
ChangeAgentStateAction,
NullAction,
)
from openhands.events.event_filter import EventFilter
from openhands.events.event_store import EventStore
from openhands.events.observation import (
AgentStateChangedObservation,
NullObservation,
)
from openhands.events.stream import EventStream
from openhands.integrations.provider import (
PROVIDER_TOKEN_TYPE,
ProviderHandler,
@@ -44,11 +45,11 @@ from openhands.server.services.conversation_service import (
create_new_conversation,
setup_init_convo_settings,
)
from openhands.server.session.conversation import ServerConversation
from openhands.server.shared import (
ConversationStoreImpl,
config,
conversation_manager,
file_store,
)
from openhands.server.types import LLMAuthenticationError, MissingSettingsError
from openhands.server.user_auth import (
@@ -60,7 +61,7 @@ from openhands.server.user_auth import (
get_user_settings_store,
)
from openhands.server.user_auth.user_auth import AuthType
from openhands.server.utils import get_conversation as get_conversation_object
from openhands.server.utils import get_conversation as get_conversation_metadata
from openhands.server.utils import get_conversation_store
from openhands.storage.conversation.conversation_store import ConversationStore
from openhands.storage.data_models.conversation_metadata import (
@@ -87,6 +88,7 @@ class InitSessionRequest(BaseModel):
suggested_task: SuggestedTask | None = None
create_microagent: CreateMicroagent | None = None
conversation_instructions: str | None = None
mcp_config: MCPConfig | None = None
# Only nested runtimes require the ability to specify a conversation id, and it could be a security risk
if os.getenv('ALLOW_SET_CONVERSATION_ID', '0') == '1':
conversation_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
@@ -178,6 +180,7 @@ async def new_conversation(
conversation_instructions=conversation_instructions,
git_provider=git_provider,
conversation_id=conversation_id,
mcp_config=data.mcp_config,
)
return ConversationResponse(
@@ -331,23 +334,20 @@ async def delete_conversation(
return True
@app.get('/conversations/{conversation_id}/remember_prompt')
@app.get('/conversations/{conversation_id}/remember-prompt')
async def get_prompt(
conversation_id: str,
event_id: int,
user_settings: SettingsStore = Depends(get_user_settings_store),
conversation: ServerConversation | None = Depends(get_conversation_object),
metadata: ConversationMetadata = Depends(get_conversation_metadata),
):
if conversation is None:
return JSONResponse(
status_code=404,
content={'error': 'Conversation not found.'},
)
# get event stream for the conversation
event_stream = conversation.event_stream
# get event store for the conversation
event_store = EventStore(
sid=conversation_id, file_store=file_store, user_id=metadata.user_id
)
# retrieve the relevant events
stringified_events = _get_contextual_events(event_stream, event_id)
stringified_events = _get_contextual_events(event_store, event_id)
# generate a prompt
settings = await user_settings.load()
@@ -551,7 +551,7 @@ async def stop_conversation(
)
def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
def _get_contextual_events(event_store: EventStore, event_id: int) -> str:
# find the specified events to learn from
# Get X events around the target event
context_size = 4
@@ -567,7 +567,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
) # the types of events that can be in an agent's history
# from event_id - context_size to event_id..
context_before = event_stream.search_events(
context_before = event_store.search_events(
start_id=event_id,
filter=agent_event_filter,
reverse=True,
@@ -575,7 +575,7 @@ def _get_contextual_events(event_stream: EventStream, event_id: int) -> str:
)
# from event_id to event_id + context_size + 1
context_after = event_stream.search_events(
context_after = event_store.search_events(
start_id=event_id + 1,
filter=agent_event_filter,
limit=context_size + 1,

View File

@@ -2,6 +2,7 @@ import uuid
from types import MappingProxyType
from typing import Any
from openhands.core.config.mcp_config import MCPConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events.action.message import MessageAction
from openhands.experiments.experiment_manager import ExperimentManagerImpl
@@ -44,6 +45,7 @@ async def create_new_conversation(
attach_convo_id: bool = False,
git_provider: ProviderType | None = None,
conversation_id: str | None = None,
mcp_config: MCPConfig | None = None,
) -> AgentLoopInfo:
logger.info(
'Creating conversation',
@@ -82,6 +84,9 @@ async def create_new_conversation(
session_init_args['selected_branch'] = selected_branch
session_init_args['git_provider'] = git_provider
session_init_args['conversation_instructions'] = conversation_instructions
if mcp_config:
session_init_args['mcp_config'] = mcp_config
conversation_init_data = ConversationInitData(**session_init_args)
logger.info('Loading conversation store')

View File

@@ -124,10 +124,12 @@ class Session:
)
# Set Git user configuration if provided in settings
if hasattr(settings, 'git_user_name') and settings.git_user_name:
self.config.git_user_name = settings.git_user_name
if hasattr(settings, 'git_user_email') and settings.git_user_email:
self.config.git_user_email = settings.git_user_email
git_user_name = getattr(settings, 'git_user_name', None)
if git_user_name is not None:
self.config.git_user_name = git_user_name
git_user_email = getattr(settings, 'git_user_email', None)
if git_user_email is not None:
self.config.git_user_email = git_user_email
max_iterations = settings.max_iterations or self.config.max_iterations
# Prioritize settings over config for max_budget_per_task
@@ -152,6 +154,14 @@ class Session:
self.logger.debug(
f'MCP configuration before setup - self.config.mcp_config: {self.config.mcp}'
)
# Check if settings has custom mcp_config
mcp_config = getattr(settings, 'mcp_config', None)
if mcp_config is not None:
# Use the provided MCP SHTTP servers instead of default setup
self.config.mcp = self.config.mcp.merge(mcp_config)
self.logger.debug(f'Merged custom MCP Config: {mcp_config}')
# Add OpenHands' MCP server by default
openhands_mcp_server, openhands_mcp_stdio_servers = (
OpenHandsMCPConfigImpl.create_default_mcp_server_config(
@@ -163,7 +173,7 @@ class Session:
self.config.mcp.shttp_servers.append(openhands_mcp_server)
self.logger.debug('Added default MCP HTTP server to config')
self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)
self.config.mcp.stdio_servers.extend(openhands_mcp_stdio_servers)
self.logger.debug(
f'MCP configuration after setup - self.config.mcp: {self.config.mcp}'

View File

@@ -56,6 +56,7 @@ def get_supported_llm_models(config: OpenHandsConfig) -> list[str]:
# Add OpenHands provider models
openhands_models = [
'openhands/claude-sonnet-4-20250514',
'openhands/gpt-5-2025-08-07',
'openhands/claude-opus-4-20250514',
'openhands/gemini-2.5-pro',
'openhands/o3',

View File

@@ -4,6 +4,7 @@ from itertools import islice
from jinja2 import Template
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
from openhands.controller.state.state import State
from openhands.core.message import Message, TextContent
from openhands.events.observation.agent import MicroagentKnowledge
@@ -91,7 +92,8 @@ class PromptManager:
return Template(file.read())
def get_system_message(self) -> str:
return self.system_template.render().strip()
system_message = self.system_template.render().strip()
return refine_prompt(system_message)
def get_example_user_message(self) -> str:
"""This is an initial user message that can be provided to the agent

View File

@@ -10,6 +10,7 @@ class TermColor(Enum):
SUCCESS = 'green'
ERROR = 'red'
INFO = 'blue'
GREY = 'dark_grey'
def colorize(text: str, color: TermColor = TermColor.WARNING) -> str:

6
package-lock.json generated
View File

@@ -1,6 +0,0 @@
{
"name": "OpenHands",
"lockfileVersion": 3,
"requires": true,
"packages": {}
}

21
poetry.lock generated
View File

@@ -5152,8 +5152,11 @@ files = [
{file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
{file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
{file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
{file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
{file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
{file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@@ -5227,6 +5230,22 @@ files = [
[package.dependencies]
cobble = ">=0.1.3,<0.2"
[[package]]
name = "markdown"
version = "3.8.2"
description = "Python implementation of John Gruber's Markdown."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24"},
{file = "markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45"},
]
[package.extras]
docs = ["mdx_gh_links (>=0.2)", "mkdocs (>=1.6)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
testing = ["coverage", "pyyaml"]
[[package]]
name = "markdown-it-py"
version = "3.0.0"
@@ -11766,4 +11785,4 @@ third-party-runtimes = ["daytona", "e2b", "modal", "runloop-api-client"]
[metadata]
lock-version = "2.1"
python-versions = "^3.12,<3.14"
content-hash = "8568c6ec2e11d4fcb23e206a24896b4d2d50e694c04011b668148f484e95b406"
content-hash = "d83111cc28bf935f1c759d3ce07a21c69a85f6df035db26042326bd8fba4969f"

View File

@@ -58,6 +58,7 @@ whatthepatch = "^1.0.6"
protobuf = "^5.0.0,<6.0.0" # Updated to support newer opentelemetry
opentelemetry-api = "^1.33.1"
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
markdown = "^3.6" # Required for CLI TUI rendering
libtmux = ">=0.37,<0.40"
pygithub = "^2.5.0"
@@ -166,7 +167,7 @@ joblib = "*"
swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }
[tool.poetry.scripts]
openhands = "openhands.cli.main:main"
openhands = "openhands.cli.entry:main"
[tool.poetry.group.testgeneval.dependencies]
fuzzywuzzy = "^0.18.0"

View File

@@ -1,17 +1,36 @@
import pytest
from openhands.core.config import OH_DEFAULT_AGENT, OH_MAX_ITERATIONS, get_parser
from openhands.core.config import (
get_evaluation_parser,
get_headless_parser,
)
def test_parser_default_values():
parser = get_parser()
def test_headless_parser_default_values():
parser = get_headless_parser()
args = parser.parse_args([])
assert args.directory is None
assert args.task == ''
assert args.file is None
assert args.agent_cls == OH_DEFAULT_AGENT
assert args.max_iterations == OH_MAX_ITERATIONS
assert args.agent_cls is None
assert args.max_iterations is None
assert args.max_budget_per_task is None
assert args.llm_config is None
assert args.name == ''
assert not args.no_auto_continue
assert args.selected_repo is None
def test_evaluation_parser_default_values():
parser = get_evaluation_parser()
args = parser.parse_args([])
assert args.directory is None
assert args.task == ''
assert args.file is None
assert args.agent_cls is None
assert args.max_iterations is None
assert args.max_budget_per_task is None
assert args.eval_output_dir == 'evaluation/evaluation_outputs/outputs'
assert args.eval_n_limit is None
@@ -23,8 +42,8 @@ def test_parser_default_values():
assert args.selected_repo is None
def test_parser_custom_values():
parser = get_parser()
def test_evaluation_parser_custom_values():
parser = get_evaluation_parser()
args = parser.parse_args(
[
'-v',
@@ -76,7 +95,7 @@ def test_parser_custom_values():
def test_parser_file_overrides_task():
parser = get_parser()
parser = get_headless_parser()
args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt'])
assert args.task == 'task from command'
@@ -84,31 +103,31 @@ def test_parser_file_overrides_task():
def test_parser_invalid_max_iterations():
parser = get_parser()
parser = get_headless_parser()
with pytest.raises(SystemExit):
parser.parse_args(['-i', 'not_a_number'])
def test_parser_invalid_max_budget():
parser = get_parser()
parser = get_headless_parser()
with pytest.raises(SystemExit):
parser.parse_args(['-b', 'not_a_number'])
def test_parser_invalid_eval_n_limit():
parser = get_parser()
def test_evaluation_parser_invalid_eval_n_limit():
parser = get_evaluation_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--eval-n-limit', 'not_a_number'])
def test_parser_invalid_eval_num_workers():
parser = get_parser()
def test_evaluation_parser_invalid_eval_num_workers():
parser = get_evaluation_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--eval-num-workers', 'not_a_number'])
def test_help_message(capsys):
parser = get_parser()
def test_headless_parser_help_message(capsys):
parser = get_headless_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--help'])
captured = capsys.readouterr()
@@ -126,6 +145,41 @@ def test_help_message(capsys):
'-c AGENT_CLS, --agent-cls AGENT_CLS',
'-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
'-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
'-l LLM_CONFIG, --llm-config LLM_CONFIG',
'--agent-config AGENT_CONFIG',
'-n NAME, --name NAME',
'--config-file CONFIG_FILE',
'--no-auto-continue',
'--selected-repo SELECTED_REPO',
'--log-level LOG_LEVEL',
]
for element in expected_elements:
assert element in help_output, f"Expected '{element}' to be in the help message"
option_count = help_output.count(' -')
assert option_count == 15, f'Expected 15 options, found {option_count}'
def test_evaluation_parser_help_message(capsys):
parser = get_evaluation_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--help'])
captured = capsys.readouterr()
help_output = captured.out
print(help_output)
expected_elements = [
'usage:',
'Run OpenHands in evaluation mode',
'options:',
'-v, --version',
'-h, --help',
'-d DIRECTORY, --directory DIRECTORY',
'-t TASK, --task TASK',
'-f FILE, --file FILE',
'-c AGENT_CLS, --agent-cls AGENT_CLS',
'-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
'-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
'--eval-output-dir EVAL_OUTPUT_DIR',
'--eval-n-limit EVAL_N_LIMIT',
'--eval-num-workers EVAL_NUM_WORKERS',
@@ -137,7 +191,6 @@ def test_help_message(capsys):
'--config-file CONFIG_FILE',
'--no-auto-continue',
'--selected-repo SELECTED_REPO',
'--override-cli-mode OVERRIDE_CLI_MODE',
'--log-level LOG_LEVEL',
]
@@ -145,11 +198,11 @@ def test_help_message(capsys):
assert element in help_output, f"Expected '{element}' to be in the help message"
option_count = help_output.count(' -')
assert option_count == 21, f'Expected 21 options, found {option_count}'
assert option_count == 20, f'Expected 20 options, found {option_count}'
def test_selected_repo_format():
"""Test that the selected-repo argument accepts owner/repo format."""
parser = get_parser()
parser = get_headless_parser()
args = parser.parse_args(['--selected-repo', 'owner/repo'])
assert args.selected_repo == 'owner/repo'

View File

@@ -325,7 +325,6 @@ async def test_run_session_with_initial_action(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -345,7 +344,6 @@ async def test_main_without_task(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function without a task."""
loop = asyncio.get_running_loop()
@@ -359,7 +357,10 @@ async def test_main_without_task(
mock_args.llm_config = None
mock_args.name = None
mock_args.file = None
mock_parse_args.return_value = mock_args
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -393,10 +394,9 @@ async def test_main_without_task(
mock_run_session.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -412,11 +412,11 @@ async def test_main_without_task(
None,
session_name=None,
skip_banner=False,
conversation_id=None,
)
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -436,7 +436,6 @@ async def test_main_with_task(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function with a task."""
loop = asyncio.get_running_loop()
@@ -449,7 +448,11 @@ async def test_main_with_task(
mock_args.agent_cls = 'custom-agent'
mock_args.llm_config = 'custom-config'
mock_args.file = None
mock_parse_args.return_value = mock_args
mock_args.name = None
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -484,10 +487,9 @@ async def test_main_with_task(
mock_run_session.side_effect = [True, False]
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -518,7 +520,6 @@ async def test_main_with_task(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -538,7 +539,6 @@ async def test_main_with_session_name_passes_name_to_run_session(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function with a session name passes it to run_session."""
loop = asyncio.get_running_loop()
@@ -553,7 +553,10 @@ async def test_main_with_session_name_passes_name_to_run_session(
mock_args.llm_config = None
mock_args.name = test_session_name # Set the session name
mock_args.file = None
mock_parse_args.return_value = mock_args
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -587,10 +590,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
mock_run_session.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -606,6 +608,7 @@ async def test_main_with_session_name_passes_name_to_run_session(
None,
session_name=test_session_name,
skip_banner=False,
conversation_id=None,
)
@@ -709,7 +712,6 @@ async def test_run_session_with_name_attempts_state_restore(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -729,7 +731,6 @@ async def test_main_security_check_fails(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function when security check fails."""
loop = asyncio.get_running_loop()
@@ -739,7 +740,14 @@ async def test_main_security_check_fails(
# Mock arguments
mock_args = MagicMock()
mock_parse_args.return_value = mock_args
mock_args.agent_cls = None
mock_args.llm_config = None
mock_args.name = None
mock_args.file = None
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -761,10 +769,9 @@ async def test_main_security_check_fails(
mock_check_security.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -775,7 +782,6 @@ async def test_main_security_check_fails(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -795,7 +801,6 @@ async def test_config_loading_order(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test the order of configuration loading in the main function.
@@ -816,7 +821,10 @@ async def test_config_loading_order(
# Add a file property to avoid file I/O errors
mock_args.file = None
mock_args.log_level = 'INFO'
mock_parse_args.return_value = mock_args
mock_args.name = None
mock_args.conversation = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock read_task to return a dummy task
mock_read_task.return_value = 'Test task'
@@ -859,10 +867,9 @@ async def test_config_loading_order(
mock_run_session.return_value = False # No new session requested
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions for argument parsing and config setup
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()
@@ -892,7 +899,6 @@ async def test_config_loading_order(
@pytest.mark.asyncio
@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -914,7 +920,6 @@ async def test_main_with_file_option(
mock_check_security,
mock_get_settings_store,
mock_setup_config,
mock_parse_args,
):
"""Test main function with a file option."""
loop = asyncio.get_running_loop()
@@ -929,7 +934,10 @@ async def test_main_with_file_option(
mock_args.name = None
mock_args.file = '/path/to/test/file.txt'
mock_args.task = None
mock_parse_args.return_value = mock_args
mock_args.conversation = None
mock_args.log_level = None
mock_args.config_file = 'config.toml'
mock_args.override_cli_mode = None
# Mock config
mock_config = MagicMock()
@@ -965,10 +973,9 @@ async def test_main_with_file_option(
mock_run_session.return_value = False
# Run the function
await cli.main_with_loop(loop)
await cli.main_with_loop(loop, mock_args)
# Assertions
mock_parse_args.assert_called_once()
mock_setup_config.assert_called_once_with(mock_args)
mock_get_settings_store.assert_called_once()
mock_settings_store.load.assert_called_once()

View File

@@ -3,6 +3,8 @@ from unittest.mock import MagicMock, patch
import pytest
from openhands.core.config import (
OH_DEFAULT_AGENT,
OH_MAX_ITERATIONS,
OpenHandsConfig,
get_llm_config_arg,
setup_config_from_args,
@@ -308,3 +310,74 @@ def test_cli_settings_json_not_override_config_toml(
# Verify that settings.json did not override config.toml
assert test_llm_config.model == 'config-toml-model'
assert test_llm_config.api_key == 'config-toml-api-key'
def test_default_values_applied_when_none():
"""Test that default values are applied when config values are None."""
# Create mock args with None values for agent_cls and max_iterations
mock_args = MagicMock()
mock_args.config_file = None
mock_args.llm_config = None
mock_args.agent_cls = None
mock_args.max_iterations = None
# Load config
with patch(
'openhands.core.config.utils.load_openhands_config',
return_value=OpenHandsConfig(),
):
config = setup_config_from_args(mock_args)
# Verify they match the expected defaults
assert config.default_agent == OH_DEFAULT_AGENT
assert config.max_iterations == OH_MAX_ITERATIONS
def test_cli_args_override_defaults():
"""Test that CLI arguments override default values."""
# Create mock args with custom values
mock_args = MagicMock()
mock_args.config_file = None
mock_args.llm_config = None
mock_args.agent_cls = 'CustomAgent'
mock_args.max_iterations = 50
# Load config
with patch(
'openhands.core.config.utils.load_openhands_config',
return_value=OpenHandsConfig(),
):
config = setup_config_from_args(mock_args)
# Verify custom values are used instead of defaults
assert config.default_agent == 'CustomAgent'
assert config.max_iterations == 50
def test_cli_args_none_uses_config_toml_values():
"""Test that when CLI args agent_cls and max_iterations are None, config.toml values are used."""
# Create mock args with None values for agent_cls and max_iterations
mock_args = MagicMock()
mock_args.config_file = None
mock_args.llm_config = None
mock_args.agent_cls = None
mock_args.max_iterations = None
# Create a config with specific values from config.toml
config_from_toml = OpenHandsConfig()
config_from_toml.default_agent = 'ConfigTomlAgent'
config_from_toml.max_iterations = 100
# Load config
with patch(
'openhands.core.config.utils.load_openhands_config',
return_value=config_from_toml,
):
config = setup_config_from_args(mock_args)
# Verify config.toml values are preserved when CLI args are None
assert config.default_agent == 'ConfigTomlAgent'
assert config.max_iterations == 100

View File

@@ -13,6 +13,7 @@ from openhands.integrations.service_types import (
Repository,
)
from openhands.microagent.types import MicroagentContentResponse
from openhands.server.dependencies import check_session_api_key
from openhands.server.routes.git import app as git_app
from openhands.server.user_auth import (
get_access_token,
@@ -49,10 +50,15 @@ def test_client():
def mock_get_user_id():
return 'test_user'
def mock_check_session_api_key():
# Mock session API key check to always pass for tests
return None
# Override the dependencies in the app
app.dependency_overrides[get_provider_tokens] = mock_get_provider_tokens
app.dependency_overrides[get_access_token] = mock_get_access_token
app.dependency_overrides[get_user_id] = mock_get_user_id
app.dependency_overrides[check_session_api_key] = mock_check_session_api_key
yield TestClient(app)

View File

@@ -46,24 +46,32 @@ def test_localhost_cors_middleware_init_without_env_var():
def test_localhost_cors_middleware_is_allowed_origin_localhost(app):
"""Test that localhost origins are allowed regardless of port."""
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
"""Test that localhost origins are allowed regardless of port when no specific origins are configured."""
# Test without setting PERMITTED_CORS_ORIGINS to trigger localhost behavior
with patch.dict(os.environ, {}, clear=True):
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
# Test with localhost
response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
assert response.status_code == 200
assert response.headers['access-control-allow-origin'] == 'http://localhost:8000'
# Test with localhost
response = client.get('/test', headers={'Origin': 'http://localhost:8000'})
assert response.status_code == 200
assert (
response.headers['access-control-allow-origin'] == 'http://localhost:8000'
)
# Test with different port
response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
assert response.status_code == 200
assert response.headers['access-control-allow-origin'] == 'http://localhost:3000'
# Test with different port
response = client.get('/test', headers={'Origin': 'http://localhost:3000'})
assert response.status_code == 200
assert (
response.headers['access-control-allow-origin'] == 'http://localhost:3000'
)
# Test with 127.0.0.1
response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
assert response.status_code == 200
assert response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
# Test with 127.0.0.1
response = client.get('/test', headers={'Origin': 'http://127.0.0.1:8000'})
assert response.status_code == 200
assert (
response.headers['access-control-allow-origin'] == 'http://127.0.0.1:8000'
)
def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
@@ -87,14 +95,15 @@ def test_localhost_cors_middleware_is_allowed_origin_non_localhost(app):
def test_localhost_cors_middleware_missing_origin(app):
"""Test behavior when Origin header is missing."""
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
with patch.dict(os.environ, {}, clear=True):
app.add_middleware(LocalhostCORSMiddleware)
client = TestClient(app)
# Test without Origin header
response = client.get('/test')
assert response.status_code == 200
# There should be no access-control-allow-origin header
assert 'access-control-allow-origin' not in response.headers
# Test without Origin header
response = client.get('/test')
assert response.status_code == 200
# There should be no access-control-allow-origin header
assert 'access-control-allow-origin' not in response.headers
def test_localhost_cors_middleware_inheritance():

View File

@@ -0,0 +1,179 @@
import sys
from unittest.mock import patch
import pytest
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.core.config import AgentConfig
from openhands.llm.llm import LLM
# Skip all tests in this module if not running on Windows
pytestmark = pytest.mark.skipif(
sys.platform != 'win32', reason='Windows prompt refinement tests require Windows'
)
@pytest.fixture
def mock_llm():
"""Create a mock LLM for testing."""
llm = LLM(config={'model': 'gpt-4', 'api_key': 'test'})
return llm
@pytest.fixture
def agent_config():
"""Create a basic agent config for testing."""
return AgentConfig()
def test_codeact_agent_system_prompt_no_bash_on_windows(mock_llm, agent_config):
"""Test that CodeActAgent's system prompt doesn't contain 'bash' on Windows."""
# Create a CodeActAgent instance
agent = CodeActAgent(llm=mock_llm, config=agent_config)
# Get the system prompt
system_prompt = agent.prompt_manager.get_system_message()
# Assert that 'bash' doesn't exist in the system prompt (case-insensitive)
assert 'bash' not in system_prompt.lower(), (
f"System prompt contains 'bash' on Windows platform. "
f"It should be replaced with 'powershell'. "
f'System prompt: {system_prompt}'
)
# Verify that 'powershell' exists instead (case-insensitive)
assert 'powershell' in system_prompt.lower(), (
f"System prompt should contain 'powershell' on Windows platform. "
f'System prompt: {system_prompt}'
)
def test_codeact_agent_tool_descriptions_no_bash_on_windows(mock_llm, agent_config):
"""Test that CodeActAgent's tool descriptions don't contain 'bash' on Windows."""
# Create a CodeActAgent instance
agent = CodeActAgent(llm=mock_llm, config=agent_config)
# Get the tools
tools = agent.tools
# Check each tool's description and parameters
for tool in tools:
if tool['type'] == 'function':
function_info = tool['function']
# Check function description
description = function_info.get('description', '')
assert 'bash' not in description.lower(), (
f"Tool '{function_info['name']}' description contains 'bash' on Windows. "
f'Description: {description}'
)
# Check parameter descriptions
parameters = function_info.get('parameters', {})
properties = parameters.get('properties', {})
for param_name, param_info in properties.items():
param_description = param_info.get('description', '')
assert 'bash' not in param_description.lower(), (
f"Tool '{function_info['name']}' parameter '{param_name}' "
f"description contains 'bash' on Windows. "
f'Parameter description: {param_description}'
)
def test_in_context_learning_example_no_bash_on_windows():
"""Test that in-context learning examples don't contain 'bash' on Windows."""
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
from openhands.agenthub.codeact_agent.tools.finish import FinishTool
from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
create_str_replace_editor_tool,
)
from openhands.llm.fn_call_converter import get_example_for_tools
# Create a sample set of tools
tools = [
create_cmd_run_tool(),
create_str_replace_editor_tool(),
FinishTool,
]
# Get the in-context learning example
example = get_example_for_tools(tools)
# Assert that 'bash' doesn't exist in the example (case-insensitive)
assert 'bash' not in example.lower(), (
f"In-context learning example contains 'bash' on Windows platform. "
f"It should be replaced with 'powershell'. "
f'Example: {example}'
)
# Verify that 'powershell' exists instead (case-insensitive)
if example: # Only check if example is not empty
assert 'powershell' in example.lower(), (
f"In-context learning example should contain 'powershell' on Windows platform. "
f'Example: {example}'
)
def test_refine_prompt_function_works():
"""Test that the refine_prompt function correctly replaces 'bash' with 'powershell'."""
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
# Test basic replacement
test_prompt = 'Execute a bash command to list files'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert 'powershell' in refined_prompt.lower()
assert refined_prompt == 'Execute a powershell command to list files'
# Test multiple occurrences
test_prompt = 'Use bash to run bash commands in the bash shell'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert (
refined_prompt
== 'Use powershell to run powershell commands in the powershell shell'
)
# Test case sensitivity
test_prompt = 'BASH and Bash and bash should all be replaced'
refined_prompt = refine_prompt(test_prompt)
assert 'bash' not in refined_prompt.lower()
assert (
refined_prompt
== 'powershell and powershell and powershell should all be replaced'
)
# Test execute_bash tool name replacement
test_prompt = 'Use the execute_bash tool to run commands'
refined_prompt = refine_prompt(test_prompt)
assert 'execute_bash' not in refined_prompt.lower()
assert 'execute_powershell' in refined_prompt.lower()
assert refined_prompt == 'Use the execute_powershell tool to run commands'
# Test that words containing 'bash' but not equal to 'bash' are preserved
test_prompt = 'The bashful person likes bash-like syntax'
refined_prompt = refine_prompt(test_prompt)
# 'bashful' should be preserved, 'bash-like' should become 'powershell-like'
assert 'bashful' in refined_prompt
assert 'powershell-like' in refined_prompt
assert refined_prompt == 'The bashful person likes powershell-like syntax'
def test_refine_prompt_function_on_non_windows():
"""Test that the refine_prompt function doesn't change anything on non-Windows platforms."""
from openhands.agenthub.codeact_agent.tools.bash import refine_prompt
# Mock sys.platform to simulate non-Windows
with patch('openhands.agenthub.codeact_agent.tools.bash.sys.platform', 'linux'):
test_prompt = 'Execute a bash command to list files'
refined_prompt = refine_prompt(test_prompt)
# On non-Windows, the prompt should remain unchanged
assert refined_prompt == test_prompt
assert 'bash' in refined_prompt.lower()