Compare commits

..

3 Commits

Author SHA1 Message Date
Engel Nyst
aea92f3869 runtime(bash): use dedicated tmux socket to avoid inherited permission issues; initialize _closed safely
Create a fresh, uniquely named tmux socket for each BashSession via libtmux.Server(socket_name=...). This prevents connecting to a root-owned TMUX socket (e.g., /tmp/tmux-0/default) on CI, which caused interactive tests to misbehave.

Also set _closed early and reset on initialize to avoid __del__ AttributeError if initialization fails early.

Co-authored-by: OpenHands-GPT-5 openhands@all-hands.dev
2025-08-18 10:52:29 +00:00
enyst
cee88aff48 codeact: expose execute_bash.name for clearer comparisons
- Export a lightweight execute_bash object with a .name attribute
- Switch comparisons and handler map in CodeAct function_calling to use execute_bash.name

Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-17 18:42:56 +00:00
enyst
0c2283abcc feat(codeact tools): encapsulate Bash tool as class; add Tool base; dispatch via handler (#10441)
- Add Tool base class with to_param(), parse_arguments(), to_action()
- Implement CmdRunTool with schema and validation
- Use CmdRunTool in agent tool list and response parser
- Keep create_cmd_run_tool for backward compat

Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-17 17:52:05 +00:00
300 changed files with 17157 additions and 24098 deletions

View File

@@ -22,7 +22,7 @@ jobs:
uses: actions/checkout@v4
- name: Install poetry via pipx
uses: abatilo/actions-poetry@v4
uses: abatilo/actions-poetry@v3
with:
poetry-version: 2.1.3
@@ -183,11 +183,7 @@ jobs:
# Run the tests with detailed output
cd tests/e2e
poetry run python -m pytest \
test_settings.py::test_github_token_configuration \
test_conversation.py::test_conversation_start \
test_browsing_catchphrase.py::test_browsing_catchphrase \
-v --no-header --capture=no --timeout=900
poetry run python -m pytest test_settings.py::test_github_token_configuration test_conversation.py::test_conversation_start -v --no-header --capture=no --timeout=600
- name: Upload test results
if: always()

View File

@@ -29,12 +29,6 @@ jobs:
run: |
cd frontend
npm install --frozen-lockfile
- name: Generate i18n and route types
run: |
cd frontend
npm run make-i18n
npx react-router typegen || true
- name: Fix frontend lint issues
run: |
cd frontend
@@ -51,7 +45,7 @@ jobs:
git config --local user.email "openhands@all-hands.dev"
git config --local user.name "OpenHands Bot"
git add -A
git commit -m "🤖 Auto-fix frontend linting issues" --no-verify
git commit -m "🤖 Auto-fix frontend linting issues"
git push
# Python lint fixes
@@ -93,5 +87,5 @@ jobs:
git config --local user.email "openhands@all-hands.dev"
git config --local user.name "OpenHands Bot"
git add -A
git commit -m "🤖 Auto-fix Python linting issues" --no-verify
git commit -m "🤖 Auto-fix Python linting issues"
git push

View File

@@ -73,7 +73,7 @@ jobs:
- name: Install Python dependencies using Poetry
run: poetry install --with dev,test,runtime
- name: Run Windows unit tests
run: poetry run pytest -svv tests/unit/runtime/utils/test_windows_bash.py
run: poetry run pytest -svv tests/unit/test_windows_bash.py
env:
PYTHONPATH: ".;$env:PYTHONPATH"
DEBUG: "1"

View File

@@ -1,50 +0,0 @@
name: Welcome Good First Issue
on:
issues:
types: [labeled]
permissions:
issues: write
jobs:
comment-on-good-first-issue:
if: github.event.label.name == 'good first issue'
runs-on: ubuntu-latest
steps:
- name: Check if welcome comment already exists
id: check_comment
uses: actions/github-script@v7
with:
result-encoding: string
script: |
const issueNumber = context.issue.number;
const comments = await github.rest.issues.listComments({
...context.repo,
issue_number: issueNumber
});
const alreadyCommented = comments.data.some(
(comment) =>
comment.body.includes('<!-- auto-comment:good-first-issue -->')
);
return alreadyCommented ? 'true' : 'false';
- name: Leave welcome comment
if: steps.check_comment.outputs.result == 'false'
uses: actions/github-script@v7
with:
script: |
const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
await github.rest.issues.createComment({
...context.repo,
issue_number: context.issue.number,
body: "🙌 **Hey there, future contributor!** 🙌\n\n" +
"This issue has been labeled as **good first issue**, which means it's a great place to get started with the OpenHands project.\n\n" +
"If you're interested in working on it, feel free to! No need to ask for permission.\n\n" +
"Be sure to check out our [development setup guide](" + repoUrl + "/blob/main/Development.md) to get your environment set up, and follow our [contribution guidelines](" + repoUrl + "/blob/main/CONTRIBUTING.md) when you're ready to submit a fix.\n\n" +
"🙌 Happy hacking! 🙌\n\n" +
"<!-- auto-comment:good-first-issue -->"
});

View File

@@ -87,8 +87,6 @@ VSCode Extension:
If you are starting a pull request (PR), please follow the template in `.github/pull_request_template.md`.
If you need to add labels when opening a PR, check the existing labels defined on that repository and select from existing ones. Do not invent your own labels.
## Implementation Details
These details may or may not be useful for your current task.
@@ -144,35 +142,6 @@ Your specialized knowledge and instructions here...
- Add the setting to the `Settings` model in `openhands/storage/data_models/settings.py`
- Update any relevant backend code to apply the setting (e.g., in session creation)
#### Settings UI Patterns:
There are two main patterns for saving settings in the OpenHands frontend:
**Pattern 1: Entity-based Resources (Immediate Save)**
- Used for: API Keys, Secrets, MCP Servers
- Behavior: Changes are saved immediately when user performs actions (add/edit/delete)
- Implementation:
- No "Save Changes" button
- No local state management or `isDirty` tracking
- Uses dedicated mutation hooks for each operation (e.g., `use-add-mcp-server.ts`, `use-delete-mcp-server.ts`)
- Each mutation triggers immediate API call with query invalidation for UI updates
- Example: MCP settings, API Keys & Secrets tabs
- Benefits: Simpler UX, no risk of losing changes, consistent with modern web app patterns
**Pattern 2: Form-based Settings (Manual Save)**
- Used for: Application settings, LLM configuration
- Behavior: Changes are accumulated locally and saved when user clicks "Save Changes"
- Implementation:
- Has "Save Changes" button that becomes enabled when changes are detected
- Uses local state management with `isDirty` tracking
- Uses `useSaveSettings` hook to save all changes at once
- Example: LLM tab, Application tab
- Benefits: Allows bulk changes, explicit save action, can validate all fields before saving
**When to use each pattern:**
- Use Pattern 1 (Immediate Save) for entity management where each item is independent
- Use Pattern 2 (Manual Save) for configuration forms where settings are interdependent or need validation
### Adding New LLM Models
To add a new LLM model to OpenHands, you need to update multiple files across both frontend and backend:

View File

@@ -159,7 +159,7 @@ poetry run pytest ./tests/unit/test_*.py
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker
container image by setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.54-nikolaik`
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.53-nikolaik`
## Develop inside Docker container

View File

@@ -79,17 +79,17 @@ You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)
You can also run OpenHands directly with Docker:
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands:/.openhands \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.54
docker.all-hands.dev/all-hands-ai/openhands:0.53
```
</details>

View File

@@ -51,17 +51,17 @@ OpenHands也可以使用Docker在本地系统上运行。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands:/.openhands \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.54
docker.all-hands.dev/all-hands-ai/openhands:0.53
```
> **注意**: 如果您在0.44版本之前使用过OpenHands您可能需要运行 `mv ~/.openhands-state ~/.openhands` 来将对话历史迁移到新位置。

View File

@@ -42,17 +42,17 @@ OpenHandsはDockerを利用してローカル環境でも実行できます。
> 公共ネットワークで実行していますか?[Hardened Docker Installation Guide](https://docs.all-hands.dev/usage/runtimes/docker#hardened-docker-installation)を参照して、ネットワークバインディングの制限や追加のセキュリティ対策を実施してください。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands:/.openhands \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.54
docker.all-hands.dev/all-hands-ai/openhands:0.53
```
**注**: バージョン0.44以前のOpenHandsを使用していた場合は、会話履歴を移行するために `mv ~/.openhands-state ~/.openhands` を実行してください。

View File

@@ -21,7 +21,7 @@ ENV POETRY_NO_INTERACTION=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache
RUN apt-get update -y \
&& apt-get install -y curl make git build-essential jq gettext \
&& apt-get install -y curl make git build-essential \
&& python3 -m pip install poetry --break-system-packages
COPY pyproject.toml poetry.lock ./

View File

@@ -12,7 +12,7 @@ services:
- SANDBOX_API_HOSTNAME=host.docker.internal
- DOCKER_HOST_ADDR=host.docker.internal
#
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.54-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.53-nikolaik}
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:

View File

@@ -7,7 +7,7 @@ services:
image: openhands:latest
container_name: openhands-app-${DATE:-}
environment:
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik}
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of ~/.openhands for this user
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:

File diff suppressed because it is too large Load Diff

View File

Before

Width:  |  Height:  |  Size: 113 KiB

After

Width:  |  Height:  |  Size: 113 KiB

View File

@@ -2,102 +2,55 @@
title: Backend Architecture
---
<div style={{ textAlign: 'center' }}>
<img src="https://github.com/All-Hands-AI/OpenHands/assets/16201837/97d747e3-29d8-4ccb-8d34-6ad1adb17f38" alt="OpenHands System Architecture Diagram Jul 4 2024" />
<p><em>OpenHands System Architecture Diagram (July 4, 2024)</em></p>
</div>
This is a high-level overview of the system architecture. The system is divided into two main components: the frontend and the backend. The frontend is responsible for handling user interactions and displaying the results. The backend is responsible for handling the business logic and executing the agents.
# System overview
# Frontend architecture
```mermaid
flowchart LR
U["User"] --> FE["Frontend (SPA)"]
FE -- "HTTP/WS" --> BE["OpenHands Backend"]
BE --> ES["EventStream"]
BE --> ST["Storage"]
BE --> RT["Runtime Interface"]
BE --> LLM["LLM Providers"]
subgraph Runtime
direction TB
RT --> DRT["Docker Runtime"]
RT --> LRT["Local Runtime"]
RT --> RRT["Remote Runtime"]
DRT --> AES["Action Execution Server"]
LRT --> AES
RRT --> AES
AES --> Bash["Bash Session"]
AES --> Jupyter["Jupyter Plugin"]
AES --> Browser["BrowserEnv"]
end
```
![system_architecture.svg](/static/img/system_architecture.svg)
This Overview is simplified to show the main components and their interactions. For a more detailed view of the backend architecture, see the Backend Architecture section below.
# Backend Architecture
_**Disclaimer**: The backend architecture is a work in progress and is subject to change. The following diagram shows the current architecture of the backend based on the commit that is shown in the footer of the diagram._
```mermaid
classDiagram
class Agent {
<<abstract>>
+sandbox_plugins: list[PluginRequirement]
}
class CodeActAgent {
+tools
}
Agent <|-- CodeActAgent
class EventStream
class Observation
class Action
Action --> Observation
Agent --> EventStream
class Runtime {
+connect()
+send_action_for_execution()
}
class ActionExecutionClient {
+_send_action_server_request()
}
class DockerRuntime
class LocalRuntime
class RemoteRuntime
Runtime <|-- ActionExecutionClient
ActionExecutionClient <|-- DockerRuntime
ActionExecutionClient <|-- LocalRuntime
ActionExecutionClient <|-- RemoteRuntime
class ActionExecutionServer {
+/execute_action
+/alive
}
class BashSession
class JupyterPlugin
class BrowserEnv
ActionExecutionServer --> BashSession
ActionExecutionServer --> JupyterPlugin
ActionExecutionServer --> BrowserEnv
Agent --> Runtime
Runtime ..> ActionExecutionServer : REST
```
![backend_architecture.svg](/static/img/backend_architecture.svg)
<details>
<summary>Updating this Diagram</summary>
<div>
We maintain architecture diagrams inline with Mermaid in this MDX.
The generation of the backend architecture diagram is partially automated.
The diagram is generated from the type hints in the code using the py2puml
tool. The diagram is then manually reviewed, adjusted and exported to PNG
and SVG.
Guidance:
- Edit the Mermaid blocks directly (flowchart/classDiagram).
- Quote labels and edge text for GitHub preview compatibility.
- Keep relationships concise and reflect stable abstractions (agents, runtime client/server, plugins).
- Verify accuracy against code:
- openhands/runtime/impl/action_execution/action_execution_client.py
- openhands/runtime/impl/docker/docker_runtime.py
- openhands/runtime/impl/local/local_runtime.py
- openhands/runtime/action_execution_server.py
- openhands/runtime/plugins/*
- Build docs locally or view on GitHub to confirm diagrams render.
## Prerequisites
- Running python environment in which openhands is executable
(according to the instructions in the README.md file in the root of the repository)
- [py2puml](https://github.com/lucsorel/py2puml) installed
## Steps
1. Autogenerate the diagram by running the following command from the root of the repository:
`py2puml openhands openhands > docs/architecture/backend_architecture.puml`
2. Open the generated file in a PlantUML editor, e.g. Visual Studio Code with the PlantUML extension or [PlantText](https://www.planttext.com/)
3. Review the generated PUML and make all necessary adjustments to the diagram (add missing parts, fix mistakes, improve positioning).
_py2puml creates the diagram based on the type hints in the code, so missing or incorrect type hints may result in an incomplete or incorrect diagram._
4. Review the diff between the new and the previous diagram and manually check if the changes are correct.
_Make sure not to remove parts that were manually added to the diagram in the past and are still relevant._
5. Add the commit hash of the commit that was used to generate the diagram to the diagram footer.
6. Export the diagram as PNG and SVG files and replace the existing diagrams in the `docs/architecture` directory. This can be done with (e.g. [PlantText](https://www.planttext.com/))
</div>
</details>

View File

@@ -52,7 +52,7 @@ graph TD
2. Image Building: OpenHands builds a new Docker image (the "OH runtime image") based on the user-provided image. This new image includes OpenHands-specific code, primarily the "runtime client"
3. Container Launch: When OpenHands starts, it launches a Docker container using the OH runtime image
4. Action Execution Server Initialization: The action execution server initializes an `ActionExecutor` inside the container, setting up necessary components like a bash shell and loading any specified plugins
5. Communication: The OpenHands backend (client: `openhands/runtime/impl/action_execution/action_execution_client.py`; runtimes: `openhands/runtime/impl/docker/docker_runtime.py`, `openhands/runtime/impl/local/local_runtime.py`) communicates with the action execution server over RESTful API, sending actions and receiving observations
5. Communication: The OpenHands backend (`openhands/runtime/impl/eventstream/eventstream_runtime.py`) communicates with the action execution server over RESTful API, sending actions and receiving observations
6. Action Execution: The runtime client receives actions from the backend, executes them in the sandboxed environment, and sends back observations
7. Observation Return: The action execution server sends execution results back to the OpenHands backend as observations
@@ -72,7 +72,7 @@ Check out the [relevant code](https://github.com/All-Hands-AI/OpenHands/blob/mai
### Image Tagging System
OpenHands uses a three-tag system for its runtime images to balance reproducibility with flexibility.
The tags are:
Tags may be in one of 2 formats:
- **Versioned Tag**: `oh_v{openhands_version}_{base_image}` (e.g.: `oh_v0.9.9_nikolaik_s_python-nodejs_t_python3.12-nodejs22`)
- **Lock Tag**: `oh_v{openhands_version}_{16_digit_lock_hash}` (e.g.: `oh_v0.9.9_1234567890abcdef`)
@@ -119,52 +119,18 @@ This tagging approach allows OpenHands to efficiently manage both development an
2. The system can quickly rebuild images when minor changes occur (by leveraging recent compatible images)
3. The **lock** tag (e.g., `runtime:oh_v0.9.3_1234567890abcdef`) always points to the latest build for a particular base image, dependency, and OpenHands version combination
## Volume mounts: named volumes and overlay
OpenHands supports both bind mounts and Docker named volumes in SandboxConfig.volumes:
- Bind mount: "/abs/host/path:/container/path[:mode]"
- Named volume: "volume:<name>:/container/path[:mode]" or any non-absolute host spec treated as a named volume
Overlay mode (copy-on-write layer) is supported for bind mounts by appending ":overlay" to the mode (e.g., ":ro,overlay").
To enable overlay COW, set SANDBOX_VOLUME_OVERLAYS to a writable host directory; per-container upper/work dirs are created under it. If SANDBOX_VOLUME_OVERLAYS is unset, overlay mounts are skipped.
Implementation references:
- openhands/runtime/impl/docker/docker_runtime.py (named volumes in _build_docker_run_args; overlay mounts in _process_overlay_mounts)
- openhands/core/config/sandbox_config.py (volumes field)
## Runtime Plugin System
The OpenHands Runtime supports a plugin system that allows for extending functionality and customizing the runtime environment. Plugins are initialized when the action execution server starts up inside the runtime.
The OpenHands Runtime supports a plugin system that allows for extending functionality and customizing the runtime environment. Plugins are initialized when the runtime client starts up.
## Ports and URLs
Check [an example of Jupyter plugin here](https://github.com/All-Hands-AI/OpenHands/blob/ecf4aed28b0cf7c18d4d8ff554883ba182fc6bdd/openhands/runtime/plugins/jupyter/__init__.py#L21-L55) if you want to implement your own plugin.
- Host port allocation uses file-locked ranges for stability and concurrency:
- Main runtime port: find_available_port_with_lock on configured range
- VSCode port: SandboxConfig.sandbox.vscode_port if provided, else find_available_port_with_lock in VSCODE_PORT_RANGE
- App ports: two additional ranges for plugin/web apps
- DOCKER_HOST_ADDR (if set) adjusts how URLs are formed for LocalRuntime/Docker environments.
- VSCode URL is exposed with a connection token from the action execution server endpoint /vscode/connection_token and rendered as:
- Docker/Local: http://localhost:{port}/?tkn={token}&folder={workspace_mount_path_in_sandbox}
- RemoteRuntime: scheme://vscode-{host}/?tkn={token}&folder={workspace_mount_path_in_sandbox}
References:
- openhands/runtime/impl/docker/docker_runtime.py (port ranges, locking, DOCKER_HOST_ADDR, vscode_url)
- openhands/runtime/impl/local/local_runtime.py (vscode_url factory)
- openhands/runtime/impl/remote/remote_runtime.py (vscode_url mapping)
- openhands/runtime/action_execution_server.py (/vscode/connection_token)
Examples:
- Jupyter: openhands/runtime/plugins/jupyter/__init__.py (JupyterPlugin, Kernel Gateway)
- VS Code: openhands/runtime/plugins/vscode/* (VSCodePlugin, exposes tokenized URL)
- Agent Skills: openhands/runtime/plugins/agent_skills/*
*More details about the Plugin system are still under construction - contributions are welcomed!*
Key aspects of the plugin system:
1. Plugin Definition: Plugins are defined as Python classes that inherit from a base `Plugin` class
2. Plugin Registration: Available plugins are registered in `openhands/runtime/plugins/__init__.py` via `ALL_PLUGINS`
2. Plugin Registration: Available plugins are registered in an `ALL_PLUGINS` dictionary
3. Plugin Specification: Plugins are associated with `Agent.sandbox_plugins: list[PluginRequirement]`. Users can specify which plugins to load when initializing the runtime
4. Initialization: Plugins are initialized asynchronously when the runtime starts and are accessible to actions
5. Usage: Plugins extend capabilities (e.g., Jupyter for IPython cells); the server exposes any web endpoints (ports) via host port mapping
4. Initialization: Plugins are initialized asynchronously when the runtime client starts
5. Usage: The runtime client can use initialized plugins to extend its capabilities (e.g., the JupyterPlugin for running IPython cells)

View File

@@ -65,7 +65,7 @@ To send follow-up messages for the same conversation, mention `@openhands` in a
Conversation is started by mentioning `@openhands`.
![slack-create-conversation.png](/static/img/slack-create-conversation.png)
![slack-create-convo.png](/static/img/slack-create-convo.png)
### See agent response and send follow up messages

View File

@@ -119,7 +119,7 @@ The conversation history will be saved in `~/.openhands/sessions`.
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -128,8 +128,8 @@ docker run -it \
-v ~/.openhands:/.openhands \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
python -m openhands.cli.entry --override-cli-mode true
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
python -m openhands.cli.main --override-cli-mode true
```
<Note>

View File

@@ -61,7 +61,7 @@ export GITHUB_TOKEN="your-token" # Required for repository operations
# Run OpenHands
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -73,7 +73,7 @@ docker run -it \
-v ~/.openhands:/.openhands \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -68,23 +68,23 @@ Download and install the LM Studio desktop app from [lmstudio.ai](https://lmstud
1. Check [the installation guide](/usage/local-setup) and ensure all prerequisites are met before running OpenHands, then run:
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands:/.openhands \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.54
docker.all-hands.dev/all-hands-ai/openhands:0.53
```
2. Wait until the server is running (see log below):
```
Digest: sha256:e72f9baecb458aedb9afc2cd5bc935118d1868719e55d50da73190d3a85c674f
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.54
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.53
Starting OpenHands...
Running OpenHands as root
14:22:13 - openhands:INFO: server_config.py:50 - Using config class None

View File

@@ -109,17 +109,17 @@ Note that you'll still need `uv` installed for the default MCP servers to work p
<Accordion title="Docker Command (Click to expand)">
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands:/.openhands \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.54
docker.all-hands.dev/all-hands-ai/openhands:0.53
```
</Accordion>

View File

@@ -130,28 +130,3 @@ docker run # ... \
<Note>
**Docker Desktop Required**: Network isolation features, including custom networks and `host.docker.internal` routing, require Docker Desktop. Docker Engine alone does not support these features on localhost across custom networks. If you're using Docker Engine without Docker Desktop, network isolation may not work as expected.
</Note>
### Sidecar Containers
If you want to run sidecar containers to the sandbox 'runner' containers without exposing the sandbox containers to the host network, you can use the `SANDBOX_ADDITIONAL_NETWORKS` environment variable to specify additional Docker network names that should be added to the sandbox containers.
```bash
docker network create openhands-sccache
docker run -d \
--hostname openhandsredis \
--network openhands-sccache \
redis
docker run # ...
-e SANDBOX_ADDITIONAL_NETWORKS='["openhands-sccache"]' \
# ...
```
Then all sandbox instances will have to access a shared redis instance at `openhandsredis:6379`.
#### Docker Compose gotcha
Note that Docker Compose adds a prefix (a scope) by default to created networks, which is not taken into account by the additional networks config. Therefore when using docker compose you have to either:
- specify a network name via the `name` field to remove the scoping (https://docs.docker.com/reference/compose-file/networks/#name)
- or provide the scope within the given config (e.g. `SANDBOX_ADDITIONAL_NETWORKS: '["myscope_openhands-sccache"]'` where `myscope` is the docker-compose assigned prefix).

View File

@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -147,7 +146,7 @@ def process_instance(
logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
test_result = game.reward()
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -274,7 +273,7 @@ def process_instance(
# remove when it becomes unnecessary
histories = compatibility_for_eval_history_pairs(state.history)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -247,7 +246,7 @@ def process_instance(
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary
histories = compatibility_for_eval_history_pairs(state.history)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -295,7 +294,7 @@ def process_instance(
raise ValueError('State should not be None.')
test_result = complete_runtime(runtime, instance)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary

View File

@@ -18,7 +18,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -423,7 +422,7 @@ def process_instance(
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -89,7 +88,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary

View File

@@ -16,7 +16,6 @@ from evaluation.utils.shared import (
assert_and_raise,
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -481,7 +480,7 @@ def process_instance(
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -17,7 +17,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -295,7 +294,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
test_result = complete_runtime(state)
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)

View File

@@ -22,7 +22,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -270,7 +269,7 @@ Here is the task:
'model_answer': model_answer,
'ground_truth': instance['Final answer'],
}
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -109,7 +108,7 @@ def process_instance(
# attempt to parse model_answer
ast_eval_fn = instance['ast_eval']
correct, hallucination = ast_eval_fn(instance_id, model_answer_raw)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
logger.info(
f'Final message: {model_answer_raw} | Correctness: {correct} | Hallucination: {hallucination}'
)

View File

@@ -30,7 +30,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -293,7 +292,7 @@ Ok now its time to start solving the question. Good luck!
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(

View File

@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -249,7 +248,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
test_result = complete_runtime(runtime, instance)
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)

View File

@@ -22,7 +22,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -336,7 +335,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
)
)
assert state is not None
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else {}
test_result = complete_runtime(runtime, instance)

View File

@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -248,7 +247,7 @@ def process_instance(
)
test_result['final_message'] = final_message
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary

View File

@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -175,7 +174,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Instruction is the first message from the USER
instruction = ''

View File

@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -206,7 +205,7 @@ def process_instance(
task_state = state.extra_data['task_state']
logger.info('Task state: ' + str(task_state.to_dict()))
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -26,7 +26,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -251,7 +250,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
)
)
assert state is not None
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else {}
test_result = complete_runtime(runtime)

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -219,7 +218,7 @@ If the program uses some packages that are incompatible, please figure out alter
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -93,9 +93,6 @@ export USE_HINT_TEXT=true # Ignore this if you are not sure.
# Specify a condenser configuration for memory management (default: NoOpCondenser)
export EVAL_CONDENSER=summarizer_for_eval # Name of the condenser config group in config.toml
# Specify the instruction prompt template file name
export INSTRUCTION_TEMPLATE_NAME=swe_custom.j2 # Name of the file in the swe_bench/prompts folder.
```
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,

View File

@@ -108,9 +108,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
llm_model = metadata.llm_config.model
# Determine the template file based on mode and LLM
if metadata.instruction_template_name:
template_name = metadata.instruction_template_name
elif mode.startswith('swt'):
if mode.startswith('swt'):
template_name = 'swt.j2'
elif mode == 'swe':
if 'gpt-4.1' in llm_model:
@@ -124,7 +122,6 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
logger.error(f'Unexpected evaluation mode: {mode}. Falling back to default.')
template_name = 'swe_default.j2'
logger.debug(f'Using instruction template file: {template_name}')
# Set up Jinja2 environment
# Assuming templates are in 'evaluation/benchmarks/swe_bench/prompts' relative to this script
prompts_dir = os.path.join(os.path.dirname(__file__), 'prompts')

View File

@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -180,7 +179,7 @@ def process_instance(
raise ValueError('State should not be None.')
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Save the output
instruction = message_action.content

View File

@@ -11,7 +11,6 @@ from evaluation.utils.shared import (
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -135,7 +134,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
correct = eval_answer(str(model_answer_raw), str(answer))
logger.info(f'Final message: {model_answer_raw} | Correctness: {correct}')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -180,7 +179,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Instruction obtained from the first message from the USER
instruction = ''

View File

@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
EvalOutput,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -164,7 +163,7 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
# Instruction is the first message from the USER
instruction = ''

View File

@@ -9,7 +9,6 @@ from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
get_default_sandbox_config_for_eval,
get_metrics,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
@@ -136,7 +135,7 @@ def process_instance(
assert len(histories) > 0, 'History should not be empty'
test_result: TestResult = test_class.verify_result(runtime, histories)
metrics = get_metrics(state)
metrics = state.metrics.get() if state.metrics else None
finally:
runtime.close()

View File

@@ -53,7 +53,6 @@ class EvalMetadata(BaseModel):
data_split: str | None = None
details: dict[str, Any] | None = None
condenser_config: CondenserConfig | None = None
instruction_template_name: str | None = None
class EvalOutput(BaseModel):
@@ -206,7 +205,6 @@ def make_metadata(
condenser_config=condenser_config
if condenser_config
else NoOpCondenserConfig(),
instruction_template_name=os.environ.get('INSTRUCTION_TEMPLATE_NAME'),
)
metadata_json = metadata.model_dump_json()
logger.info(f'Metadata: {metadata_json}')
@@ -668,23 +666,8 @@ def is_fatal_runtime_error(error: str | None) -> bool:
def get_metrics(state: State) -> dict[str, Any]:
"""Extract metrics for evaluations.
Prefer ConversationStats (source of truth) and fall back to state.metrics for
backward compatibility.
"""
metrics: dict[str, Any]
try:
if getattr(state, 'conversation_stats', None):
combined = state.conversation_stats.get_combined_metrics()
metrics = combined.get()
elif getattr(state, 'metrics', None):
metrics = state.metrics.get()
else:
metrics = {}
except Exception:
metrics = state.metrics.get() if getattr(state, 'metrics', None) else {}
"""Extract metrics from the state."""
metrics = state.metrics.get() if state.metrics else {}
metrics['condenser'] = get_condensation_metadata(state)
return metrics

View File

@@ -232,16 +232,13 @@ describe("RepositorySelectionForm", () => {
renderForm();
const dropdown = await screen.findByTestId("repo-dropdown");
const input = dropdown.querySelector(
'input[type="text"]',
) as HTMLInputElement;
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
expect(input).toBeInTheDocument();
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
"kubernetes/kubernetes",
3,
"github",
);
});
@@ -271,16 +268,13 @@ describe("RepositorySelectionForm", () => {
renderForm();
const dropdown = await screen.findByTestId("repo-dropdown");
const input = dropdown.querySelector(
'input[type="text"]',
) as HTMLInputElement;
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
expect(input).toBeInTheDocument();
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
"kubernetes/kubernetes",
3,
"github",
);
});
});

View File

@@ -444,38 +444,28 @@ describe("MicroagentManagement", () => {
expect(filePath2).toBeInTheDocument();
});
it("should render add microagent button", async () => {
it("should display add microagent button in repository accordion", async () => {
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
});
// Check that add microagent buttons are present
const addButtons = screen.getAllByTestId("add-microagent-button");
expect(addButtons.length).toBeGreaterThan(0);
});
it("should open modal when add button is clicked", async () => {
it("should open add microagent modal when add button is clicked", async () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1302,18 +1292,11 @@ describe("MicroagentManagement", () => {
it("should render add microagent button", async () => {
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Check that add microagent buttons are present
const addButtons = screen.getAllByTestId("add-microagent-button");
expect(addButtons.length).toBeGreaterThan(0);
@@ -1323,18 +1306,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1385,18 +1361,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1416,18 +1385,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1446,18 +1408,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1486,18 +1441,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1520,18 +1468,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);
@@ -1553,18 +1494,11 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
renderMicroagentManagement();
// Wait for repositories to be loaded and processed
// Wait for repositories to be loaded
await waitFor(() => {
expect(mockUseUserRepositories).toHaveBeenCalled();
});
// Wait for repositories to be displayed in the accordion
await waitFor(() => {
expect(
screen.getByTestId("repository-name-tooltip"),
).toBeInTheDocument();
});
// Find and click the first add microagent button
const addButtons = screen.getAllByTestId("add-microagent-button");
await user.click(addButtons[0]);

View File

@@ -136,7 +136,7 @@ describe("Settings Screen", () => {
"secrets",
"api keys",
];
const sectionsToExclude = ["llm"];
const sectionsToExclude = ["llm", "mcp"];
renderSettingsScreen();

View File

@@ -1,12 +1,12 @@
{
"name": "openhands-frontend",
"version": "0.54.0",
"version": "0.53.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "openhands-frontend",
"version": "0.54.0",
"version": "0.53.0",
"dependencies": {
"@heroui/react": "^2.8.2",
"@heroui/use-infinite-scroll": "^2.2.10",

View File

@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.54.0",
"version": "0.53.0",
"private": true,
"type": "module",
"engines": {

View File

@@ -1,9 +1,7 @@
import { useCallback, useMemo, useState } from "react";
import { useCallback, useMemo, useRef } from "react";
import { useTranslation } from "react-i18next";
import { Provider } from "../../types/settings";
import { useGitRepositories } from "../../hooks/query/use-git-repositories";
import { useSearchRepositories } from "../../hooks/query/use-search-repositories";
import { useDebounce } from "../../hooks/use-debounce";
import OpenHands from "../../api/open-hands";
import { GitRepository } from "../../types/git";
import {
@@ -21,6 +19,10 @@ export interface GitRepositoryDropdownProps {
onChange?: (repository?: GitRepository) => void;
}
interface SearchCache {
[key: string]: GitRepository[];
}
export function GitRepositoryDropdown({
provider,
value,
@@ -31,20 +33,6 @@ export function GitRepositoryDropdown({
onChange,
}: GitRepositoryDropdownProps) {
const { t } = useTranslation();
const [searchInput, setSearchInput] = useState("");
const debouncedSearchInput = useDebounce(searchInput, 300);
// Process search input to handle URLs
const processedSearchInput = useMemo(() => {
if (debouncedSearchInput.startsWith("https://")) {
const match = debouncedSearchInput.match(
/https:\/\/[^/]+\/([^/]+\/[^/]+)/,
);
return match ? match[1] : debouncedSearchInput;
}
return debouncedSearchInput;
}, [debouncedSearchInput]);
const {
data,
fetchNextPage,
@@ -57,10 +45,6 @@ export function GitRepositoryDropdown({
enabled: !disabled,
});
// Search query for processed input (handles URLs)
const { data: searchData, isLoading: isSearchLoading } =
useSearchRepositories(processedSearchInput, provider);
const allOptions: AsyncSelectOption[] = useMemo(
() =>
data?.pages
@@ -74,83 +58,75 @@ export function GitRepositoryDropdown({
[data],
);
const searchOptions: AsyncSelectOption[] = useMemo(
() =>
searchData
? searchData.map((repo) => ({
value: repo.id,
label: repo.full_name,
}))
: [],
[searchData],
);
// Keep track of search results
const searchCache = useRef<SearchCache>({});
const selectedOption = useMemo(() => {
// First check in loaded pages
const option = allOptions.find((opt) => opt.value === value);
if (option) return option;
// If not found, check in search results
const searchOption = searchOptions.find((opt) => opt.value === value);
if (searchOption) return searchOption;
// If not found, check in search cache
const repo = Object.values(searchCache.current)
.flat()
.find((r) => r.id === value);
if (repo) {
return {
value: repo.id,
label: repo.full_name,
};
}
return null;
}, [allOptions, searchOptions, value]);
}, [allOptions, value]);
const loadOptions = useCallback(
async (inputValue: string): Promise<AsyncSelectOption[]> => {
// Update search input to trigger debounced search
setSearchInput(inputValue);
// If empty input, show all loaded options
if (!inputValue.trim()) {
return allOptions;
}
// For very short inputs, do local filtering
if (inputValue.length < 2) {
return allOptions.filter((option) =>
option.label.toLowerCase().includes(inputValue.toLowerCase()),
);
}
// Handle URL inputs by performing direct search
// If it looks like a URL, extract the repo name and search
if (inputValue.startsWith("https://")) {
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
if (match) {
const repoName = match[1];
try {
// Perform direct search for URL-based inputs
const repositories = await OpenHands.searchGitRepositories(
repoName,
3,
provider,
);
return repositories.map((repo) => ({
value: repo.full_name,
label: repo.full_name,
data: repo,
}));
} catch (error) {
// Fall back to local filtering if search fails
return allOptions.filter((option) =>
option.label.toLowerCase().includes(repoName.toLowerCase()),
);
}
const searchResults = await OpenHands.searchGitRepositories(
repoName,
3,
);
// Cache the search results
searchCache.current[repoName] = searchResults;
return searchResults.map((repo) => ({
value: repo.id,
label: repo.full_name,
}));
}
}
// For regular text inputs, use hook-based search results if available
if (searchOptions.length > 0 && processedSearchInput === inputValue) {
return searchOptions;
// For any other input, search via API
if (inputValue.length >= 2) {
// Only search if at least 2 characters
const searchResults = await OpenHands.searchGitRepositories(
inputValue,
10,
);
// Cache the search results
searchCache.current[inputValue] = searchResults;
return searchResults.map((repo) => ({
value: repo.id,
label: repo.full_name,
}));
}
// Fallback to local filtering while search is loading
// For very short inputs, do local filtering
return allOptions.filter((option) =>
option.label.toLowerCase().includes(inputValue.toLowerCase()),
);
},
[allOptions, searchOptions, processedSearchInput, provider],
[allOptions],
);
const handleChange = (option: AsyncSelectOption | null) => {
@@ -166,7 +142,9 @@ export function GitRepositoryDropdown({
// If not found, check in search results
if (!repo) {
repo = searchData?.find((r) => r.id === option.value);
repo = Object.values(searchCache.current)
.flat()
.find((r) => r.id === option.value);
}
onChange?.(repo);
@@ -189,7 +167,7 @@ export function GitRepositoryDropdown({
errorMessage={errorMessage}
disabled={disabled}
isClearable={false}
isLoading={isLoading || isFetchingNextPage || isSearchLoading}
isLoading={isLoading || isLoading || isFetchingNextPage}
cacheOptions
defaultOptions={allOptions}
onChange={handleChange}

View File

@@ -17,7 +17,7 @@ export function MicroagentManagementAccordionTitle({
<TooltipButton
tooltip={repository.full_name}
ariaLabel={repository.full_name}
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[200px] translate-y-[-1px]"
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[232px]"
testId="repository-name-tooltip"
placement="bottom"
>

View File

@@ -7,6 +7,8 @@ import {
} from "#/state/microagent-management-slice";
import { RootState } from "#/store";
import { GitRepository } from "#/types/git";
import PlusIcon from "#/icons/plus.svg?react";
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
interface MicroagentManagementAddMicroagentButtonProps {
repository: GitRepository;
@@ -23,22 +25,23 @@ export function MicroagentManagementAddMicroagentButton({
const dispatch = useDispatch();
const handleClick = (e: React.MouseEvent<HTMLButtonElement>) => {
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
e.stopPropagation();
dispatch(setAddMicroagentModalVisible(!addMicroagentModalVisible));
dispatch(setSelectedRepository(repository));
};
return (
<button
type="button"
onClick={handleClick}
className="translate-y-[-1px]"
data-testid="add-microagent-button"
>
<span className="text-sm font-normal leading-5 text-[#8480FF] cursor-pointer hover:text-[#6C63FF] transition-colors duration-200">
{t(I18nKey.COMMON$ADD_MICROAGENT)}
</span>
</button>
<div onClick={handleClick}>
<TooltipButton
tooltip={t(I18nKey.COMMON$ADD_MICROAGENT)}
ariaLabel={t(I18nKey.COMMON$ADD_MICROAGENT)}
className="p-0 min-w-0 h-6 w-6 flex items-center justify-center bg-transparent cursor-pointer"
testId="add-microagent-button"
placement="bottom"
>
<PlusIcon width={22} height={22} />
</TooltipButton>
</div>
);
}

View File

@@ -1,5 +1,4 @@
import React, { useEffect, useState } from "react";
import { useTranslation } from "react-i18next";
import { useDispatch, useSelector } from "react-redux";
import { MicroagentManagementSidebar } from "./microagent-management-sidebar";
import { MicroagentManagementMain } from "./microagent-management-main";
@@ -26,12 +25,6 @@ import { GitRepository } from "#/types/git";
import { queryClient } from "#/query-client-config";
import { Provider } from "#/types/settings";
import { MicroagentManagementLearnThisRepoModal } from "./microagent-management-learn-this-repo-modal";
import {
displaySuccessToast,
displayErrorToast,
} from "#/utils/custom-toast-handlers";
import { getFirstPRUrl } from "#/utils/parse-pr-url";
import { I18nKey } from "#/i18n/declaration";
// Handle error events
const isErrorEvent = (evt: unknown): evt is { error: true; message: string } =>
@@ -119,8 +112,6 @@ export function MicroagentManagementContent() {
learnThisRepoModalVisible,
} = useSelector((state: RootState) => state.microagentManagement);
const { t } = useTranslation();
const dispatch = useDispatch();
const { createConversationAndSubscribe, isPending } =
@@ -168,37 +159,6 @@ export function MicroagentManagementContent() {
? (selectedRepository as GitRepository).full_name
: "";
// Check if agent is running and ready to work
if (
isOpenHandsEvent(socketEvent) &&
isAgentStateChangeObservation(socketEvent) &&
socketEvent.extras.agent_state === AgentState.RUNNING
) {
displaySuccessToast(
t(I18nKey.MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT),
);
}
// Check if agent has finished and we have a PR
if (isOpenHandsEvent(socketEvent) && isFinishAction(socketEvent)) {
const prUrl = getFirstPRUrl(socketEvent.args.final_thought || "");
if (prUrl) {
displaySuccessToast(
t(I18nKey.MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW),
);
} else {
// Agent finished but no PR found
displaySuccessToast(t(I18nKey.MICROAGENT_MANAGEMENT$PR_NOT_CREATED));
}
}
// Handle error events
if (isErrorEvent(socketEvent) || isAgentStatusError(socketEvent)) {
displayErrorToast(
t(I18nKey.MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT),
);
}
if (shouldInvalidateConversationsList(socketEvent)) {
invalidateConversationsList(repositoryName);
}

View File

@@ -65,18 +65,6 @@ export function MicroagentManagementRepoMicroagents({
}
}, [conversations]);
useEffect(
() => () => {
dispatch(
setSelectedMicroagentItem({
microagent: null,
conversation: null,
}),
);
},
[],
);
// Show loading only when both queries are loading
const isLoading = isLoadingMicroagents || isLoadingConversations;
@@ -94,7 +82,7 @@ export function MicroagentManagementRepoMicroagents({
// If there's an error with microagents, show the learn this repo component
if (isError) {
return (
<div>
<div className="pb-4">
<MicroagentManagementLearnThisRepo repository={repository} />
</div>
);
@@ -105,7 +93,7 @@ export function MicroagentManagementRepoMicroagents({
const totalItems = numberOfMicroagents + numberOfConversations;
return (
<div>
<div className="pb-4">
{totalItems === 0 && (
<MicroagentManagementLearnThisRepo repository={repository} />
)}

View File

@@ -97,10 +97,8 @@ export function MicroagentManagementRepositories({
variant="splitted"
className="w-full px-0 gap-3"
itemClasses={{
base: "shadow-none bg-transparent cursor-pointer px-0",
trigger: "cursor-pointer gap-2 py-3",
indicator:
"flex items-center justify-center p-0.5 pr-[3px] text-white hover:bg-[#454545] rounded transition-colors duration-200 rotate-180",
base: "shadow-none bg-transparent border border-[#ffffff40] rounded-[6px] cursor-pointer",
trigger: "cursor-pointer gap-1",
}}
selectionMode="multiple"
>

View File

@@ -1,110 +0,0 @@
import { render, screen, fireEvent } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { MCPServerForm } from "../mcp-server-form";
// i18n mock
vi.mock("react-i18next", () => ({
useTranslation: () => ({
t: (key: string) => key,
}),
}));
describe("MCPServerForm validation", () => {
const noop = () => {};
it("rejects invalid env var lines and allows blank lines", () => {
const onSubmit = vi.fn();
render(
<MCPServerForm
mode="add"
server={{ id: "tmp", type: "stdio" }}
existingServers={[]}
onSubmit={onSubmit}
onCancel={noop}
/>,
);
// Fill required fields
fireEvent.change(screen.getByTestId("name-input"), {
target: { value: "my-server" },
});
fireEvent.change(screen.getByTestId("command-input"), {
target: { value: "npx" },
});
// Invalid env entries mixed with blank lines
fireEvent.change(screen.getByTestId("env-input"), {
target: { value: "invalid\n\nKEY=value\n=novalue\nKEY_ONLY=" },
});
fireEvent.click(screen.getByTestId("submit-button"));
// Should show invalid env format error
expect(
screen.getByText("SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT"),
).toBeInTheDocument();
// Fix env with valid lines and blank lines
fireEvent.change(screen.getByTestId("env-input"), {
target: { value: "KEY=value\n\nANOTHER=123" },
});
fireEvent.click(screen.getByTestId("submit-button"));
// No error; submit should be called
expect(onSubmit).toHaveBeenCalledTimes(1);
});
it("rejects duplicate URLs across sse/shttp types", () => {
const onSubmit = vi.fn();
const existingServers = [
{ id: "sse-1", type: "sse" as const, url: "https://api.example.com" },
{ id: "shttp-1", type: "shttp" as const, url: "https://x.example.com" },
];
const r1 = render(
<MCPServerForm
mode="add"
server={{ id: "tmp", type: "sse" }}
existingServers={existingServers}
onSubmit={onSubmit}
onCancel={noop}
/>,
);
fireEvent.change(screen.getAllByTestId("url-input")[0], {
target: { value: "https://api.example.com" },
});
fireEvent.click(screen.getAllByTestId("submit-button")[0]);
expect(
screen.getByText("SETTINGS$MCP_ERROR_URL_DUPLICATE"),
).toBeInTheDocument();
// Unmount first form, then check shttp duplicate
r1.unmount();
const r2 = render(
<MCPServerForm
mode="add"
server={{ id: "tmp2", type: "shttp" }}
existingServers={existingServers}
onSubmit={onSubmit}
onCancel={noop}
/>,
);
fireEvent.change(screen.getAllByTestId("url-input")[0], {
target: { value: "https://api.example.com" },
});
fireEvent.click(screen.getAllByTestId("submit-button")[0]);
expect(
screen.getByText("SETTINGS$MCP_ERROR_URL_DUPLICATE"),
).toBeInTheDocument();
r2.unmount();
});
});

View File

@@ -1,158 +0,0 @@
import { render, screen } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { MCPServerList } from "../mcp-server-list";
// Mock react-i18next
vi.mock("react-i18next", () => ({
useTranslation: () => ({
t: (key: string) => key,
}),
}));
const mockServers = [
{
id: "sse-0",
type: "sse" as const,
url: "https://very-long-url-that-could-cause-layout-overflow.example.com/api/v1/mcp/server/endpoint/with/many/path/segments",
},
{
id: "stdio-0",
type: "stdio" as const,
name: "test-stdio-server",
command: "python",
args: ["-m", "test_server"],
},
];
describe("MCPServerList", () => {
it("should render servers with proper layout structure", () => {
const mockOnEdit = vi.fn();
const mockOnDelete = vi.fn();
render(
<MCPServerList
servers={mockServers}
onEdit={mockOnEdit}
onDelete={mockOnDelete}
/>,
);
// Check that the table structure is rendered
const table = screen.getByRole("table");
expect(table).toBeInTheDocument();
expect(table).toHaveClass("w-full");
// Check that server items are rendered
const serverItems = screen.getAllByTestId("mcp-server-item");
expect(serverItems).toHaveLength(2);
// Check that action buttons are present for each server
const editButtons = screen.getAllByTestId("edit-mcp-server-button");
const deleteButtons = screen.getAllByTestId("delete-mcp-server-button");
expect(editButtons).toHaveLength(2);
expect(deleteButtons).toHaveLength(2);
});
it("should render empty state when no servers", () => {
const mockOnEdit = vi.fn();
const mockOnDelete = vi.fn();
render(
<MCPServerList
servers={[]}
onEdit={mockOnEdit}
onDelete={mockOnDelete}
/>,
);
expect(screen.getByText("SETTINGS$MCP_NO_SERVERS")).toBeInTheDocument();
});
it("should handle long URLs without breaking layout", () => {
const longUrlServer = {
id: "sse-0",
type: "sse" as const,
url: "https://extremely-long-url-that-would-previously-cause-layout-overflow-and-push-action-buttons-out-of-view.example.com/api/v1/mcp/server/endpoint/with/many/path/segments/and/query/parameters?param1=value1&param2=value2&param3=value3",
};
const mockOnEdit = vi.fn();
const mockOnDelete = vi.fn();
render(
<MCPServerList
servers={[longUrlServer]}
onEdit={mockOnEdit}
onDelete={mockOnDelete}
/>,
);
// Check that action buttons are still present and accessible
const editButton = screen.getByTestId("edit-mcp-server-button");
const deleteButton = screen.getByTestId("delete-mcp-server-button");
expect(editButton).toBeInTheDocument();
expect(deleteButton).toBeInTheDocument();
// Check that the URL is properly displayed with title attribute for accessibility
const detailsCells = screen.getAllByTitle(longUrlServer.url);
expect(detailsCells).toHaveLength(2); // Name and Details columns both have the URL
// Check that both name and details cells use truncation and have title for tooltip
const [nameCell, detailsCell] = detailsCells;
expect(nameCell).toHaveClass("truncate");
expect(detailsCell).toHaveClass("truncate");
});
it("should display command and arguments for STDIO servers", () => {
const stdioServer = {
id: "stdio-1",
type: "stdio" as const,
name: "test-server",
command: "python",
args: ["-m", "test_module", "--verbose"],
};
const mockOnEdit = vi.fn();
const mockOnDelete = vi.fn();
render(
<MCPServerList
servers={[stdioServer]}
onEdit={mockOnEdit}
onDelete={mockOnDelete}
/>,
);
// Check that the server details show command + arguments
const expectedDetails = "python -m test_module --verbose";
expect(screen.getByTitle(expectedDetails)).toBeInTheDocument();
expect(screen.getByText(expectedDetails)).toBeInTheDocument();
});
it("should fallback to server name for STDIO servers without command", () => {
const stdioServer = {
id: "stdio-2",
type: "stdio" as const,
name: "fallback-server",
};
const mockOnEdit = vi.fn();
const mockOnDelete = vi.fn();
render(
<MCPServerList
servers={[stdioServer]}
onEdit={mockOnEdit}
onDelete={mockOnDelete}
/>,
);
// Check that the server details show the server name as fallback
// Both name and details columns will have the same value, so we expect 2 elements
const fallbackElements = screen.getAllByTitle("fallback-server");
expect(fallbackElements).toHaveLength(2);
const fallbackTextElements = screen.getAllByText("fallback-server");
expect(fallbackTextElements).toHaveLength(2);
});
});

View File

@@ -0,0 +1,78 @@
import React, { useState } from "react";
import { useTranslation } from "react-i18next";
import { MCPConfig } from "#/types/settings";
import { I18nKey } from "#/i18n/declaration";
import { MCPSSEServers } from "./mcp-sse-servers";
import { MCPStdioServers } from "./mcp-stdio-servers";
import { MCPJsonEditor } from "./mcp-json-editor";
import { BrandButton } from "../brand-button";
interface MCPConfigEditorProps {
mcpConfig?: MCPConfig;
onChange: (config: MCPConfig) => void;
}
export function MCPConfigEditor({ mcpConfig, onChange }: MCPConfigEditorProps) {
const { t } = useTranslation();
const [isEditing, setIsEditing] = useState(false);
const handleConfigChange = (newConfig: MCPConfig) => {
onChange(newConfig);
setIsEditing(false);
};
const config = mcpConfig || { sse_servers: [], stdio_servers: [] };
return (
<div>
<div className="flex flex-col gap-2 mb-6">
<div className="text-sm font-medium">
{t(I18nKey.SETTINGS$MCP_TITLE)}
</div>
<p className="text-xs text-[#A3A3A3]">
{t(I18nKey.SETTINGS$MCP_DESCRIPTION)}
</p>
</div>
{!isEditing && (
<div className="flex justify-between items-center mb-4">
<div className="flex items-center">
<BrandButton
type="button"
variant="primary"
onClick={() => setIsEditing(true)}
>
{t(I18nKey.SETTINGS$MCP_EDIT_CONFIGURATION)}
</BrandButton>
</div>
</div>
)}
<div>
{isEditing ? (
<MCPJsonEditor
mcpConfig={mcpConfig}
onChange={handleConfigChange}
onCancel={() => setIsEditing(false)}
/>
) : (
<>
<div className="flex flex-col gap-6">
<div>
<MCPSSEServers servers={config.sse_servers} />
</div>
<div>
<MCPStdioServers servers={config.stdio_servers} />
</div>
</div>
{config.sse_servers.length === 0 &&
config.stdio_servers.length === 0 && (
<div className="mt-4 p-2 bg-yellow-50 border border-yellow-200 rounded-md text-sm text-yellow-700">
{t(I18nKey.SETTINGS$MCP_NO_SERVERS_CONFIGURED)}
</div>
)}
</>
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,139 @@
import React, { useState, useRef, useEffect } from "react";
import { useTranslation, Trans } from "react-i18next";
import { MCPConfig } from "#/types/settings";
import { I18nKey } from "#/i18n/declaration";
import { BrandButton } from "../brand-button";
import { cn } from "#/utils/utils";
interface MCPJsonEditorProps {
mcpConfig?: MCPConfig;
onChange: (config: MCPConfig) => void;
onCancel: () => void;
}
const MCP_DEFAULT_CONFIG: MCPConfig = {
sse_servers: [],
stdio_servers: [],
};
export function MCPJsonEditor({
mcpConfig,
onChange,
onCancel,
}: MCPJsonEditorProps) {
const { t } = useTranslation();
const [configText, setConfigText] = useState(() =>
mcpConfig
? JSON.stringify(mcpConfig, null, 2)
: JSON.stringify(MCP_DEFAULT_CONFIG, null, 2),
);
const [error, setError] = useState<string | null>(null);
const textareaRef = useRef<HTMLTextAreaElement>(null);
useEffect(() => {
textareaRef.current?.focus();
}, []);
const handleTextChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
setConfigText(e.target.value);
};
const handleSave = () => {
try {
const newConfig = JSON.parse(configText);
// Validate the structure
if (!newConfig.sse_servers || !Array.isArray(newConfig.sse_servers)) {
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_SSE_ARRAY));
}
if (!newConfig.stdio_servers || !Array.isArray(newConfig.stdio_servers)) {
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_STDIO_ARRAY));
}
// Validate SSE servers
for (const server of newConfig.sse_servers) {
if (
typeof server !== "string" &&
(!server.url || typeof server.url !== "string")
) {
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_SSE_URL));
}
}
// Validate stdio servers
for (const server of newConfig.stdio_servers) {
if (!server.name || !server.command) {
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_STDIO_PROPS));
}
}
onChange(newConfig);
setError(null);
} catch (e) {
setError(
e instanceof Error
? e.message
: t(I18nKey.SETTINGS$MCP_ERROR_INVALID_JSON),
);
}
};
return (
<div>
<p className="mb-2 text-sm text-gray-400">
<Trans
i18nKey={I18nKey.SETTINGS$MCP_CONFIG_DESCRIPTION}
components={{
a: (
<a
href="https://docs.all-hands.dev/usage/mcp"
target="_blank"
rel="noopener noreferrer"
className="text-blue-400 hover:underline"
>
documentation
</a>
),
}}
/>
</p>
<textarea
ref={textareaRef}
className={cn(
"w-full h-64 resize-y p-2 rounded-sm text-sm font-mono",
"bg-tertiary border border-[#717888]",
"placeholder:italic placeholder:text-tertiary-alt",
"focus:outline-none focus:ring-1 focus:ring-primary",
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
)}
value={configText}
onChange={handleTextChange}
spellCheck="false"
/>
{error && (
<div className="mt-2 p-2 bg-red-100 border border-red-300 rounded-md text-sm text-red-700">
<strong>{t(I18nKey.SETTINGS$MCP_CONFIG_ERROR)}</strong> {error}
</div>
)}
<div className="mt-2 text-sm text-gray-400">
<strong>{t(I18nKey.SETTINGS$MCP_CONFIG_EXAMPLE)}</strong>{" "}
<code>
{
'{ "sse_servers": ["https://example-mcp-server.com/sse"], "stdio_servers": [{ "name": "fetch", "command": "uvx", "args": ["mcp-server-fetch"] }] }'
}
</code>
</div>
<div className="mt-4 flex justify-end gap-3">
<BrandButton type="button" variant="secondary" onClick={onCancel}>
{t(I18nKey.BUTTON$CANCEL)}
</BrandButton>
<BrandButton type="button" variant="primary" onClick={handleSave}>
{t(I18nKey.SETTINGS$MCP_PREVIEW_CHANGES)}
</BrandButton>
</div>
</div>
);
}

View File

@@ -1,376 +0,0 @@
import React from "react";
import { useTranslation } from "react-i18next";
import { I18nKey } from "#/i18n/declaration";
import { SettingsInput } from "../settings-input";
import { SettingsDropdownInput } from "../settings-dropdown-input";
import { BrandButton } from "../brand-button";
import { OptionalTag } from "../optional-tag";
import { cn } from "#/utils/utils";
type MCPServerType = "sse" | "stdio" | "shttp";
interface MCPServerConfig {
id: string;
type: MCPServerType;
name?: string;
url?: string;
api_key?: string;
command?: string;
args?: string[];
env?: Record<string, string>;
}
interface MCPServerFormProps {
mode: "add" | "edit";
server?: MCPServerConfig;
existingServers?: MCPServerConfig[];
onSubmit: (server: MCPServerConfig) => void;
onCancel: () => void;
}
export function MCPServerForm({
mode,
server,
existingServers,
onSubmit,
onCancel,
}: MCPServerFormProps) {
const { t } = useTranslation();
const [serverType, setServerType] = React.useState<MCPServerType>(
server?.type || "sse",
);
const [error, setError] = React.useState<string | null>(null);
const serverTypeOptions = [
{ key: "sse", label: t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SSE) },
{ key: "stdio", label: t(I18nKey.SETTINGS$MCP_SERVER_TYPE_STDIO) },
{ key: "shttp", label: t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SHTTP) },
];
const validateUrl = (url: string): string | null => {
if (!url) return t(I18nKey.SETTINGS$MCP_ERROR_URL_REQUIRED);
try {
const urlObj = new URL(url);
if (!["http:", "https:"].includes(urlObj.protocol)) {
return t(I18nKey.SETTINGS$MCP_ERROR_URL_INVALID_PROTOCOL);
}
} catch {
return t(I18nKey.SETTINGS$MCP_ERROR_URL_INVALID);
}
return null;
};
const validateName = (name: string): string | null => {
if (!name) return t(I18nKey.SETTINGS$MCP_ERROR_NAME_REQUIRED);
if (!/^[a-zA-Z0-9_-]+$/.test(name)) {
return t(I18nKey.SETTINGS$MCP_ERROR_NAME_INVALID);
}
return null;
};
const validateNameUniqueness = (name: string): string | null => {
if (!existingServers) return null;
const shouldCheckUniqueness =
mode === "add" || (mode === "edit" && server?.name !== name);
if (!shouldCheckUniqueness) return null;
const existingStdioNames = existingServers
.filter((s) => s.type === "stdio")
.map((s) => s.name)
.filter(Boolean);
if (existingStdioNames.includes(name)) {
return t(I18nKey.SETTINGS$MCP_ERROR_NAME_DUPLICATE);
}
return null;
};
const validateCommand = (command: string): string | null => {
if (!command) return t(I18nKey.SETTINGS$MCP_ERROR_COMMAND_REQUIRED);
if (command.includes(" ")) {
return t(I18nKey.SETTINGS$MCP_ERROR_COMMAND_NO_SPACES);
}
return null;
};
const validateUrlUniqueness = (url: string): string | null => {
if (!existingServers) return null;
const originalUrl = server?.url;
const changed = mode === "add" || (mode === "edit" && originalUrl !== url);
if (!changed) return null;
// For URL-based servers (sse/shttp), ensure URL is unique across both types
const exists = existingServers.some(
(s) => (s.type === "sse" || s.type === "shttp") && s.url === url,
);
if (exists) return t(I18nKey.SETTINGS$MCP_ERROR_URL_DUPLICATE);
return null;
};
const validateEnvFormat = (envString: string): string | null => {
if (!envString.trim()) return null;
const lines = envString.split("\n");
for (let i = 0; i < lines.length; i += 1) {
const trimmed = lines[i].trim();
if (trimmed) {
const eq = trimmed.indexOf("=");
if (eq === -1) return t(I18nKey.SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT);
const key = trimmed.substring(0, eq).trim();
if (!key) return t(I18nKey.SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT);
}
}
return null;
};
const validateStdioServer = (formData: FormData): string | null => {
const name = formData.get("name")?.toString().trim() || "";
const command = formData.get("command")?.toString().trim() || "";
const envString = formData.get("env")?.toString() || "";
const nameError = validateName(name);
if (nameError) return nameError;
const uniquenessError = validateNameUniqueness(name);
if (uniquenessError) return uniquenessError;
const commandError = validateCommand(command);
if (commandError) return commandError;
// Validate environment variable format
const envError = validateEnvFormat(envString);
if (envError) return envError;
return null;
};
const validateForm = (formData: FormData): string | null => {
if (serverType === "sse" || serverType === "shttp") {
const url = formData.get("url")?.toString().trim() || "";
const urlError = validateUrl(url);
if (urlError) return urlError;
const urlDupError = validateUrlUniqueness(url);
if (urlDupError) return urlDupError;
return null;
}
if (serverType === "stdio") {
return validateStdioServer(formData);
}
return null;
};
const parseEnvironmentVariables = (
envString: string,
): Record<string, string> => {
const env: Record<string, string> = {};
const input = envString.trim();
if (!input) return env;
for (const line of input.split("\n")) {
const trimmed = line.trim();
const eq = trimmed.indexOf("=");
const key = eq >= 0 ? trimmed.substring(0, eq).trim() : "";
if (trimmed && eq !== -1 && key) {
env[key] = trimmed.substring(eq + 1).trim();
}
}
return env;
};
const formatEnvironmentVariables = (env?: Record<string, string>): string => {
if (!env) return "";
return Object.entries(env)
.map(([key, value]) => `${key}=${value}`)
.join("\n");
};
const handleSubmit = (event: React.FormEvent<HTMLFormElement>) => {
event.preventDefault();
setError(null);
const formData = new FormData(event.currentTarget);
const validationError = validateForm(formData);
if (validationError) {
setError(validationError);
return;
}
const baseConfig = {
id: server?.id || `${serverType}-${Date.now()}`,
type: serverType,
};
if (serverType === "sse" || serverType === "shttp") {
const url = formData.get("url")?.toString().trim();
const apiKey = formData.get("api_key")?.toString().trim();
onSubmit({
...baseConfig,
url: url!,
...(apiKey && { api_key: apiKey }),
});
} else if (serverType === "stdio") {
const name = formData.get("name")?.toString().trim();
const command = formData.get("command")?.toString().trim();
const argsString = formData.get("args")?.toString().trim();
const envString = formData.get("env")?.toString().trim();
const args = argsString
? argsString
.split("\n")
.map((arg) => arg.trim())
.filter(Boolean)
: [];
const env = parseEnvironmentVariables(envString || "");
onSubmit({
...baseConfig,
name: name!,
command: command!,
...(args.length > 0 && { args }),
...(Object.keys(env).length > 0 && { env }),
});
}
};
const formTestId =
mode === "add" ? "add-mcp-server-form" : "edit-mcp-server-form";
return (
<form
data-testid={formTestId}
onSubmit={handleSubmit}
className="flex flex-col items-start gap-6"
>
{mode === "add" && (
<SettingsDropdownInput
testId="server-type-dropdown"
name="server-type"
label={t(I18nKey.SETTINGS$MCP_SERVER_TYPE)}
items={serverTypeOptions}
selectedKey={serverType}
onSelectionChange={(key) => setServerType(key as MCPServerType)}
onInputChange={() => {}} // Prevent input changes
isClearable={false}
allowsCustomValue={false}
required
wrapperClassName={cn("w-full", "max-w-[680px]")}
/>
)}
{error && <p className="text-red-500 text-sm">{error}</p>}
{(serverType === "sse" || serverType === "shttp") && (
<>
<SettingsInput
testId="url-input"
name="url"
type="url"
label={t(I18nKey.SETTINGS$MCP_URL)}
className="w-full max-w-[680px]"
required
defaultValue={server?.url || ""}
placeholder="https://api.example.com"
/>
<SettingsInput
testId="api-key-input"
name="api_key"
type="password"
label={t(I18nKey.SETTINGS$MCP_API_KEY)}
className="w-full max-w-[680px]"
showOptionalTag
defaultValue={server?.api_key || ""}
placeholder={t(I18nKey.SETTINGS$MCP_API_KEY_PLACEHOLDER)}
/>
</>
)}
{serverType === "stdio" && (
<>
<SettingsInput
testId="name-input"
name="name"
type="text"
label={t(I18nKey.SETTINGS$MCP_NAME)}
className="w-full max-w-[680px]"
required
defaultValue={server?.name || ""}
placeholder="my-mcp-server"
pattern="^[a-zA-Z0-9_-]+$"
/>
<SettingsInput
testId="command-input"
name="command"
type="text"
label={t(I18nKey.SETTINGS$MCP_COMMAND)}
className="w-full max-w-[680px]"
required
defaultValue={server?.command || ""}
placeholder="npx"
/>
<label className="flex flex-col gap-2.5 w-full max-w-[680px]">
<div className="flex items-center gap-2">
<span className="text-sm">
{t(I18nKey.SETTINGS$MCP_COMMAND_ARGUMENTS)}
</span>
<OptionalTag />
</div>
<textarea
data-testid="args-input"
name="args"
rows={3}
defaultValue={server?.args?.join("\n") || ""}
placeholder="arg1&#10;arg2&#10;arg3"
className={cn(
"bg-tertiary border border-[#717888] w-full rounded-sm p-2 placeholder:italic placeholder:text-tertiary-alt resize-none",
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
)}
/>
<p className="text-xs text-tertiary-alt">
{t(I18nKey.SETTINGS$MCP_COMMAND_ARGUMENTS_HELP)}
</p>
</label>
<label className="flex flex-col gap-2.5 w-full max-w-[680px]">
<div className="flex items-center gap-2">
<span className="text-sm">
{t(I18nKey.SETTINGS$MCP_ENVIRONMENT_VARIABLES)}
</span>
<OptionalTag />
</div>
<textarea
data-testid="env-input"
name="env"
rows={4}
defaultValue={formatEnvironmentVariables(server?.env)}
placeholder="KEY1=value1&#10;KEY2=value2"
className={cn(
"resize-none",
"bg-tertiary border border-[#717888] rounded-sm p-2 placeholder:italic placeholder:text-tertiary-alt",
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
)}
/>
</label>
</>
)}
<div className="flex items-center gap-4">
<BrandButton
testId="cancel-button"
type="button"
variant="secondary"
onClick={onCancel}
>
{t(I18nKey.BUTTON$CANCEL)}
</BrandButton>
<BrandButton testId="submit-button" type="submit" variant="primary">
{mode === "add" && t(I18nKey.SETTINGS$MCP_ADD_SERVER)}
{mode === "edit" && t(I18nKey.SETTINGS$MCP_SAVE_SERVER)}
</BrandButton>
</div>
</form>
);
}

View File

@@ -1,110 +0,0 @@
import { FaPencil, FaTrash } from "react-icons/fa6";
import { useTranslation } from "react-i18next";
import { I18nKey } from "#/i18n/declaration";
interface MCPServerConfig {
id: string;
type: "sse" | "stdio" | "shttp";
name?: string;
url?: string;
api_key?: string;
command?: string;
args?: string[];
env?: Record<string, string>;
}
export function MCPServerListItem({
server,
onEdit,
onDelete,
}: {
server: MCPServerConfig;
onEdit: () => void;
onDelete: () => void;
}) {
const { t } = useTranslation();
const getServerTypeLabel = (type: string) => {
switch (type) {
case "sse":
return t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SSE);
case "stdio":
return t(I18nKey.SETTINGS$MCP_SERVER_TYPE_STDIO);
case "shttp":
return t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SHTTP);
default:
return type.toUpperCase();
}
};
const getServerDescription = (serverConfig: MCPServerConfig) => {
if (serverConfig.type === "stdio") {
if (serverConfig.command) {
const args =
serverConfig.args && serverConfig.args.length > 0
? ` ${serverConfig.args.join(" ")}`
: "";
return `${serverConfig.command}${args}`;
}
return serverConfig.name || "";
}
if (
(serverConfig.type === "sse" || serverConfig.type === "shttp") &&
serverConfig.url
) {
return serverConfig.url;
}
return "";
};
const serverName = server.type === "stdio" ? server.name : server.url;
const serverDescription = getServerDescription(server);
return (
<tr
data-testid="mcp-server-item"
className="grid grid-cols-[minmax(0,0.25fr)_120px_minmax(0,1fr)_120px] gap-4 items-start border-t border-tertiary"
>
<td
className="p-3 text-sm text-content-2 truncate min-w-0"
title={serverName}
>
{serverName}
</td>
<td className="p-3 text-sm text-content-2 whitespace-nowrap">
{getServerTypeLabel(server.type)}
</td>
<td
className="p-3 text-sm text-content-2 opacity-80 italic min-w-0 truncate"
title={serverDescription}
>
<span className="inline-block max-w-full align-bottom">
{serverDescription}
</span>
</td>
<td className="p-3 flex items-start justify-end gap-4 whitespace-nowrap">
<button
data-testid="edit-mcp-server-button"
type="button"
onClick={onEdit}
aria-label={`Edit ${serverName}`}
className="cursor-pointer hover:text-content-1 transition-colors"
>
<FaPencil size={16} />
</button>
<button
data-testid="delete-mcp-server-button"
type="button"
onClick={onDelete}
aria-label={`Delete ${serverName}`}
className="cursor-pointer hover:text-content-1 transition-colors"
>
<FaTrash size={16} />
</button>
</td>
</tr>
);
}

View File

@@ -1,71 +0,0 @@
import { useTranslation } from "react-i18next";
import { MCPServerListItem } from "./mcp-server-list-item";
import { I18nKey } from "#/i18n/declaration";
interface MCPServerConfig {
id: string;
type: "sse" | "stdio" | "shttp";
name?: string;
url?: string;
api_key?: string;
command?: string;
args?: string[];
env?: Record<string, string>;
}
interface MCPServerListProps {
servers: MCPServerConfig[];
onEdit: (server: MCPServerConfig) => void;
onDelete: (serverId: string) => void;
}
export function MCPServerList({
servers,
onEdit,
onDelete,
}: MCPServerListProps) {
const { t } = useTranslation();
if (servers.length === 0) {
return (
<div className="border border-tertiary rounded-md p-8 text-center">
<p className="text-content-2 text-sm">
{t(I18nKey.SETTINGS$MCP_NO_SERVERS)}
</p>
</div>
);
}
return (
<div className="border border-tertiary rounded-md overflow-hidden">
<table className="w-full">
<thead className="bg-base-tertiary">
<tr className="grid grid-cols-[minmax(0,0.25fr)_120px_minmax(0,1fr)_120px] gap-4 items-start">
<th className="text-left p-3 text-sm font-medium">
{t(I18nKey.SETTINGS$NAME)}
</th>
<th className="text-left p-3 text-sm font-medium">
{t(I18nKey.SETTINGS$MCP_SERVER_TYPE)}
</th>
<th className="text-left p-3 text-sm font-medium">
{t(I18nKey.SETTINGS$MCP_SERVER_DETAILS)}
</th>
<th className="text-right p-3 text-sm font-medium">
{t(I18nKey.SETTINGS$ACTIONS)}
</th>
</tr>
</thead>
<tbody>
{servers.map((server) => (
<MCPServerListItem
key={server.id}
server={server}
onEdit={() => onEdit(server)}
onDelete={() => onDelete(server.id)}
/>
))}
</tbody>
</table>
</div>
);
}

View File

@@ -23,7 +23,7 @@ export function ModalBackdrop({ children, onClose }: ModalBackdropProps) {
<div className="fixed inset-0 flex items-center justify-center z-20">
<div
onClick={handleClick}
className="fixed inset-0 bg-black opacity-60"
className="fixed inset-0 bg-black bg-opacity-80"
/>
<div className="relative">{children}</div>
</div>

View File

@@ -1,67 +0,0 @@
import { useMutation, useQueryClient } from "@tanstack/react-query";
import { useSettings } from "#/hooks/query/use-settings";
import OpenHands from "#/api/open-hands";
import { MCPSSEServer, MCPStdioServer, MCPSHTTPServer } from "#/types/settings";
type MCPServerType = "sse" | "stdio" | "shttp";
interface MCPServerConfig {
type: MCPServerType;
name?: string;
url?: string;
api_key?: string;
command?: string;
args?: string[];
env?: Record<string, string>;
}
export function useAddMcpServer() {
const queryClient = useQueryClient();
const { data: settings } = useSettings();
return useMutation({
mutationFn: async (server: MCPServerConfig): Promise<void> => {
if (!settings) return;
const currentConfig = settings.MCP_CONFIG || {
sse_servers: [],
stdio_servers: [],
shttp_servers: [],
};
const newConfig = { ...currentConfig };
if (server.type === "sse") {
const sseServer: MCPSSEServer = {
url: server.url!,
...(server.api_key && { api_key: server.api_key }),
};
newConfig.sse_servers.push(sseServer);
} else if (server.type === "stdio") {
const stdioServer: MCPStdioServer = {
name: server.name!,
command: server.command!,
...(server.args && { args: server.args }),
...(server.env && { env: server.env }),
};
newConfig.stdio_servers.push(stdioServer);
} else if (server.type === "shttp") {
const shttpServer: MCPSHTTPServer = {
url: server.url!,
...(server.api_key && { api_key: server.api_key }),
};
newConfig.shttp_servers.push(shttpServer);
}
const apiSettings = {
mcp_config: newConfig,
};
await OpenHands.saveSettings(apiSettings);
},
onSuccess: () => {
// Invalidate the settings query to trigger a refetch
queryClient.invalidateQueries({ queryKey: ["settings"] });
},
});
}

View File

@@ -1,37 +0,0 @@
import { useMutation, useQueryClient } from "@tanstack/react-query";
import { useSettings } from "#/hooks/query/use-settings";
import OpenHands from "#/api/open-hands";
import { MCPConfig } from "#/types/settings";
export function useDeleteMcpServer() {
const queryClient = useQueryClient();
const { data: settings } = useSettings();
return useMutation({
mutationFn: async (serverId: string): Promise<void> => {
if (!settings?.MCP_CONFIG) return;
const newConfig: MCPConfig = { ...settings.MCP_CONFIG };
const [serverType, indexStr] = serverId.split("-");
const index = parseInt(indexStr, 10);
if (serverType === "sse") {
newConfig.sse_servers.splice(index, 1);
} else if (serverType === "stdio") {
newConfig.stdio_servers.splice(index, 1);
} else if (serverType === "shttp") {
newConfig.shttp_servers.splice(index, 1);
}
const apiSettings = {
mcp_config: newConfig,
};
await OpenHands.saveSettings(apiSettings);
},
onSuccess: () => {
// Invalidate the settings query to trigger a refetch
queryClient.invalidateQueries({ queryKey: ["settings"] });
},
});
}

View File

@@ -1,69 +0,0 @@
import { useMutation, useQueryClient } from "@tanstack/react-query";
import { useSettings } from "#/hooks/query/use-settings";
import OpenHands from "#/api/open-hands";
import { MCPSSEServer, MCPStdioServer, MCPSHTTPServer } from "#/types/settings";
type MCPServerType = "sse" | "stdio" | "shttp";
interface MCPServerConfig {
type: MCPServerType;
name?: string;
url?: string;
api_key?: string;
command?: string;
args?: string[];
env?: Record<string, string>;
}
export function useUpdateMcpServer() {
const queryClient = useQueryClient();
const { data: settings } = useSettings();
return useMutation({
mutationFn: async ({
serverId,
server,
}: {
serverId: string;
server: MCPServerConfig;
}): Promise<void> => {
if (!settings?.MCP_CONFIG) return;
const newConfig = { ...settings.MCP_CONFIG };
const [serverType, indexStr] = serverId.split("-");
const index = parseInt(indexStr, 10);
if (serverType === "sse") {
const sseServer: MCPSSEServer = {
url: server.url!,
...(server.api_key && { api_key: server.api_key }),
};
newConfig.sse_servers[index] = sseServer;
} else if (serverType === "stdio") {
const stdioServer: MCPStdioServer = {
name: server.name!,
command: server.command!,
...(server.args && { args: server.args }),
...(server.env && { env: server.env }),
};
newConfig.stdio_servers[index] = stdioServer;
} else if (serverType === "shttp") {
const shttpServer: MCPSHTTPServer = {
url: server.url!,
...(server.api_key && { api_key: server.api_key }),
};
newConfig.shttp_servers[index] = shttpServer;
}
const apiSettings = {
mcp_config: newConfig,
};
await OpenHands.saveSettings(apiSettings);
},
onSuccess: () => {
// Invalidate the settings query to trigger a refetch
queryClient.invalidateQueries({ queryKey: ["settings"] });
},
});
}

View File

@@ -781,37 +781,4 @@ export enum I18nKey {
PROJECT_MANAGEMENT$SVC_ACC_EMAIL_VALIDATION_ERROR = "PROJECT_MANAGEMENT$SVC_ACC_EMAIL_VALIDATION_ERROR",
PROJECT_MANAGEMENT$SVC_ACC_API_KEY_VALIDATION_ERROR = "PROJECT_MANAGEMENT$SVC_ACC_API_KEY_VALIDATION_ERROR",
MICROAGENT_MANAGEMENT$ERROR_LOADING_MICROAGENT_CONTENT = "MICROAGENT_MANAGEMENT$ERROR_LOADING_MICROAGENT_CONTENT",
SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT = "SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT",
SETTINGS$MCP_ERROR_URL_DUPLICATE = "SETTINGS$MCP_ERROR_URL_DUPLICATE",
SETTINGS$MCP_SERVER_TYPE_SSE = "SETTINGS$MCP_SERVER_TYPE_SSE",
SETTINGS$MCP_SERVER_TYPE_STDIO = "SETTINGS$MCP_SERVER_TYPE_STDIO",
SETTINGS$MCP_SERVER_TYPE_SHTTP = "SETTINGS$MCP_SERVER_TYPE_SHTTP",
SETTINGS$MCP_ERROR_URL_REQUIRED = "SETTINGS$MCP_ERROR_URL_REQUIRED",
SETTINGS$MCP_ERROR_URL_INVALID_PROTOCOL = "SETTINGS$MCP_ERROR_URL_INVALID_PROTOCOL",
SETTINGS$MCP_ERROR_URL_INVALID = "SETTINGS$MCP_ERROR_URL_INVALID",
SETTINGS$MCP_ERROR_NAME_REQUIRED = "SETTINGS$MCP_ERROR_NAME_REQUIRED",
SETTINGS$MCP_ERROR_NAME_INVALID = "SETTINGS$MCP_ERROR_NAME_INVALID",
SETTINGS$MCP_ERROR_NAME_DUPLICATE = "SETTINGS$MCP_ERROR_NAME_DUPLICATE",
SETTINGS$MCP_ERROR_COMMAND_REQUIRED = "SETTINGS$MCP_ERROR_COMMAND_REQUIRED",
SETTINGS$MCP_ERROR_COMMAND_NO_SPACES = "SETTINGS$MCP_ERROR_COMMAND_NO_SPACES",
SETTINGS$MCP_SERVER_TYPE = "SETTINGS$MCP_SERVER_TYPE",
SETTINGS$MCP_API_KEY_PLACEHOLDER = "SETTINGS$MCP_API_KEY_PLACEHOLDER",
SETTINGS$MCP_COMMAND_ARGUMENTS = "SETTINGS$MCP_COMMAND_ARGUMENTS",
SETTINGS$MCP_COMMAND_ARGUMENTS_HELP = "SETTINGS$MCP_COMMAND_ARGUMENTS_HELP",
SETTINGS$MCP_ENVIRONMENT_VARIABLES = "SETTINGS$MCP_ENVIRONMENT_VARIABLES",
SETTINGS$MCP_ADD_SERVER = "SETTINGS$MCP_ADD_SERVER",
SETTINGS$MCP_SAVE_SERVER = "SETTINGS$MCP_SAVE_SERVER",
SETTINGS$MCP_NO_SERVERS = "SETTINGS$MCP_NO_SERVERS",
SETTINGS$MCP_SERVER_DETAILS = "SETTINGS$MCP_SERVER_DETAILS",
SETTINGS$MCP_CONFIRM_DELETE = "SETTINGS$MCP_CONFIRM_DELETE",
SETTINGS$MCP_CONFIRM_CHANGES = "SETTINGS$MCP_CONFIRM_CHANGES",
SETTINGS$MCP_DEFAULT_CONFIG = "SETTINGS$MCP_DEFAULT_CONFIG",
PROJECT_MANAGEMENT$WORKSPACE_NAME_PLACEHOLDER = "PROJECT_MANAGEMENT$WORKSPACE_NAME_PLACEHOLDER",
PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION = "PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION",
PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION = "PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION",
SETTINGS = "SETTINGS",
MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT = "MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT",
MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW = "MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW",
MICROAGENT_MANAGEMENT$PR_NOT_CREATED = "MICROAGENT_MANAGEMENT$PR_NOT_CREATED",
MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT = "MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT",
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,191 +1,86 @@
import React, { useState } from "react";
import React, { useState, useEffect } from "react";
import { useTranslation } from "react-i18next";
import posthog from "posthog-js";
import { useSettings } from "#/hooks/query/use-settings";
import { useDeleteMcpServer } from "#/hooks/mutation/use-delete-mcp-server";
import { useAddMcpServer } from "#/hooks/mutation/use-add-mcp-server";
import { useUpdateMcpServer } from "#/hooks/mutation/use-update-mcp-server";
import { I18nKey } from "#/i18n/declaration";
import { MCPServerList } from "#/components/features/settings/mcp-settings/mcp-server-list";
import { MCPServerForm } from "#/components/features/settings/mcp-settings/mcp-server-form";
import { ConfirmationModal } from "#/components/shared/modals/confirmation-modal";
import { BrandButton } from "#/components/features/settings/brand-button";
import { useSaveSettings } from "#/hooks/mutation/use-save-settings";
import { MCPConfig } from "#/types/settings";
type MCPServerType = "sse" | "stdio" | "shttp";
interface MCPServerConfig {
id: string;
type: MCPServerType;
name?: string;
url?: string;
api_key?: string;
command?: string;
args?: string[];
env?: Record<string, string>;
}
import { MCPConfigEditor } from "#/components/features/settings/mcp-settings/mcp-config-editor";
import { BrandButton } from "#/components/features/settings/brand-button";
import { I18nKey } from "#/i18n/declaration";
import {
displayErrorToast,
displaySuccessToast,
} from "#/utils/custom-toast-handlers";
import { retrieveAxiosErrorMessage } from "#/utils/retrieve-axios-error-message";
function MCPSettingsScreen() {
const { t } = useTranslation();
const { data: settings, isLoading } = useSettings();
const { mutate: deleteMcpServer } = useDeleteMcpServer();
const { mutate: addMcpServer } = useAddMcpServer();
const { mutate: updateMcpServer } = useUpdateMcpServer();
const { mutate: saveSettings, isPending } = useSaveSettings();
const [view, setView] = useState<"list" | "add" | "edit">("list");
const [editingServer, setEditingServer] = useState<MCPServerConfig | null>(
null,
);
const [confirmationModalIsVisible, setConfirmationModalIsVisible] =
useState(false);
const [serverToDelete, setServerToDelete] = useState<string | null>(null);
const [mcpConfig, setMcpConfig] = useState<MCPConfig | undefined>(undefined);
const [isDirty, setIsDirty] = useState(false);
const mcpConfig: MCPConfig = settings?.MCP_CONFIG || {
sse_servers: [],
stdio_servers: [],
shttp_servers: [],
useEffect(() => {
if (!mcpConfig && settings?.MCP_CONFIG) {
setMcpConfig(settings.MCP_CONFIG);
}
}, [settings, mcpConfig]);
const handleConfigChange = (config: MCPConfig) => {
setMcpConfig(config);
setIsDirty(true);
};
// Convert servers to a unified format for display
const allServers: MCPServerConfig[] = [
...mcpConfig.sse_servers.map((server, index) => ({
id: `sse-${index}`,
type: "sse" as const,
url: typeof server === "string" ? server : server.url,
api_key: typeof server === "object" ? server.api_key : undefined,
})),
...mcpConfig.stdio_servers.map((server, index) => ({
id: `stdio-${index}`,
type: "stdio" as const,
name: server.name,
command: server.command,
args: server.args,
env: server.env,
})),
...mcpConfig.shttp_servers.map((server, index) => ({
id: `shttp-${index}`,
type: "shttp" as const,
url: typeof server === "string" ? server : server.url,
api_key: typeof server === "object" ? server.api_key : undefined,
})),
];
const formAction = () => {
if (!settings) return;
const handleAddServer = (serverConfig: MCPServerConfig) => {
addMcpServer(serverConfig, {
onSuccess: () => {
setView("list");
},
});
};
const handleEditServer = (serverConfig: MCPServerConfig) => {
updateMcpServer(
{
serverId: serverConfig.id,
server: serverConfig,
},
saveSettings(
{ MCP_CONFIG: mcpConfig },
{
onSuccess: () => {
setView("list");
displaySuccessToast(t(I18nKey.SETTINGS$SAVED));
posthog.capture("settings_saved", {
HAS_MCP_CONFIG: mcpConfig ? "YES" : "NO",
MCP_SSE_SERVERS_COUNT: mcpConfig?.sse_servers?.length || 0,
MCP_STDIO_SERVERS_COUNT: mcpConfig?.stdio_servers?.length || 0,
});
setIsDirty(false);
},
onError: (error) => {
const errorMessage = retrieveAxiosErrorMessage(error);
displayErrorToast(errorMessage || t(I18nKey.ERROR$GENERIC));
},
},
);
};
const handleDeleteServer = (serverId: string) => {
deleteMcpServer(serverId, {
onSuccess: () => {
setConfirmationModalIsVisible(false);
},
});
};
const handleEditClick = (server: MCPServerConfig) => {
setEditingServer(server);
setView("edit");
};
const handleDeleteClick = (serverId: string) => {
setServerToDelete(serverId);
setConfirmationModalIsVisible(true);
};
const handleConfirmDelete = () => {
if (serverToDelete) {
handleDeleteServer(serverToDelete);
setServerToDelete(null);
}
};
const handleCancelDelete = () => {
setConfirmationModalIsVisible(false);
setServerToDelete(null);
};
if (isLoading) {
return (
<div className="px-11 py-9 flex flex-col gap-5">
<div className="animate-pulse">
<div className="h-6 bg-gray-300 rounded w-1/4 mb-4" />
<div className="h-4 bg-gray-300 rounded w-1/2 mb-8" />
<div className="h-10 bg-gray-300 rounded w-32" />
</div>
</div>
);
return <div className="p-9">{t(I18nKey.HOME$LOADING)}</div>;
}
return (
<div className="px-11 py-9 flex flex-col gap-5">
{view === "list" && (
<>
<BrandButton
testId="add-mcp-server-button"
type="button"
variant="primary"
onClick={() => setView("add")}
isDisabled={isLoading}
>
{t(I18nKey.SETTINGS$MCP_ADD_SERVER)}
</BrandButton>
<form
data-testid="mcp-settings-screen"
action={formAction}
className="flex flex-col h-full justify-between"
>
<div className="p-9 flex flex-col gap-12">
<MCPConfigEditor mcpConfig={mcpConfig} onChange={handleConfigChange} />
</div>
<MCPServerList
servers={allServers}
onEdit={handleEditClick}
onDelete={handleDeleteClick}
/>
</>
)}
{view === "add" && (
<MCPServerForm
mode="add"
existingServers={allServers}
onSubmit={handleAddServer}
onCancel={() => setView("list")}
/>
)}
{view === "edit" && editingServer && (
<MCPServerForm
mode="edit"
server={editingServer}
existingServers={allServers}
onSubmit={handleEditServer}
onCancel={() => {
setView("list");
setEditingServer(null);
}}
/>
)}
{confirmationModalIsVisible && (
<ConfirmationModal
text={t(I18nKey.SETTINGS$MCP_CONFIRM_DELETE)}
onConfirm={handleConfirmDelete}
onCancel={handleCancelDelete}
/>
)}
</div>
<div className="flex gap-6 p-6 justify-end border-t border-t-tertiary">
<BrandButton
testId="submit-button"
type="submit"
variant="primary"
isDisabled={!isDirty || isPending}
>
{!isPending && t(I18nKey.SETTINGS$SAVE_CHANGES)}
{isPending && t(I18nKey.SETTINGS$SAVING)}
</BrandButton>
</div>
</form>
);
}

View File

@@ -23,7 +23,6 @@ const SAAS_NAV_ITEMS = [
{ to: "/settings/billing", text: "SETTINGS$NAV_CREDITS" },
{ to: "/settings/secrets", text: "SETTINGS$NAV_SECRETS" },
{ to: "/settings/api-keys", text: "SETTINGS$NAV_API_KEYS" },
{ to: "/settings/mcp", text: "SETTINGS$NAV_MCP" },
];
const OSS_NAV_ITEMS = [

View File

@@ -26,7 +26,6 @@ export const DEFAULT_SETTINGS: Settings = {
MCP_CONFIG: {
sse_servers: [],
stdio_servers: [],
shttp_servers: [],
},
GIT_USER_NAME: "openhands",
GIT_USER_EMAIL: "openhands@all-hands.dev",

View File

@@ -24,15 +24,9 @@ export type MCPStdioServer = {
env?: Record<string, string>;
};
export type MCPSHTTPServer = {
url: string;
api_key?: string;
};
export type MCPConfig = {
sse_servers: (string | MCPSSEServer)[];
stdio_servers: MCPStdioServer[];
shttp_servers: (string | MCPSHTTPServer)[];
};
export type Settings = {
@@ -83,7 +77,6 @@ export type ApiSettings = {
mcp_config?: {
sse_servers: (string | MCPSSEServer)[];
stdio_servers: MCPStdioServer[];
shttp_servers: (string | MCPSHTTPServer)[];
};
email?: string;
email_verified?: boolean;

View File

@@ -26,7 +26,6 @@ Here are some instructions for pushing, but ONLY do this if the user asks you to
* Use the `create_pr` tool to create a pull request, if you haven't already
* Once you've created your own branch or a pull request, continue to update it. Do NOT create a new one unless you are explicitly asked to. Update the PR title and description as necessary, but don't change the branch name.
* Use the main branch as the base branch, unless the user requests otherwise
* If you need to add labels when opening a PR, check the existing labels defined on that repository and select from existing ones. Do not invent your own labels.
* After opening or updating a pull request, send the user a short message with a link to the pull request.
* Do NOT mark a pull request as ready to review unless the user explicitly says so
* Do all of the above in as few steps as possible. E.g. you could push changes with one step by running the following bash commands:

View File

@@ -26,7 +26,6 @@ Here are some instructions for pushing, but ONLY do this if the user asks you to
* Use the `create_mr` tool to create a merge request, if you haven't already
* Once you've created your own branch or a merge request, continue to update it. Do NOT create a new one unless you are explicitly asked to. Update the PR title and description as necessary, but don't change the branch name.
* Use the main branch as the base branch, unless the user requests otherwise
* If you need to add labels when opening a MR, check the existing labels defined on that repository and select from existing ones. Do not invent your own labels.
* After opening or updating a merge request, send the user a short message with a link to the merge request.
* Do all of the above in as few steps as possible. E.g. you could push changes with one step by running the following bash commands:
```bash

View File

@@ -18,7 +18,7 @@ from openhands.events.action import (
from openhands.events.event import EventSource
from openhands.events.observation import BrowserOutputObservation
from openhands.events.observation.observation import Observation
from openhands.llm.llm_registry import LLMRegistry
from openhands.llm.llm import LLM
from openhands.runtime.plugins import (
PluginRequirement,
)
@@ -102,15 +102,15 @@ class BrowsingAgent(Agent):
def __init__(
self,
llm: LLM,
config: AgentConfig,
llm_registry: LLMRegistry,
) -> None:
"""Initializes a new instance of the BrowsingAgent class.
Parameters:
- llm (LLM): The llm to be used by this agent
"""
super().__init__(config, llm_registry)
super().__init__(llm, config)
# define a configurable action space, with chat functionality, web navigation, and webpage grounding using accessibility tree and HTML.
# see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/highlevel.py for more details
action_subsets = ['chat', 'bid']

View File

@@ -3,8 +3,6 @@ import sys
from collections import deque
from typing import TYPE_CHECKING
from openhands.llm.llm_registry import LLMRegistry
if TYPE_CHECKING:
from litellm import ChatCompletionToolParam
@@ -12,7 +10,7 @@ if TYPE_CHECKING:
from openhands.llm.llm import ModelResponse
import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
from openhands.agenthub.codeact_agent.tools.bash import CmdRunTool
from openhands.agenthub.codeact_agent.tools.browser import BrowserTool
from openhands.agenthub.codeact_agent.tools.condensation_request import (
CondensationRequestTool,
@@ -34,6 +32,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.message import Message
from openhands.events.action import AgentFinishAction, MessageAction
from openhands.events.event import Event
from openhands.llm.llm import LLM
from openhands.llm.llm_utils import check_tools
from openhands.memory.condenser import Condenser
from openhands.memory.condenser.condenser import Condensation, View
@@ -75,13 +74,18 @@ class CodeActAgent(Agent):
JupyterRequirement(),
]
def __init__(self, config: AgentConfig, llm_registry: LLMRegistry) -> None:
def __init__(
self,
llm: LLM,
config: AgentConfig,
) -> None:
"""Initializes a new instance of the CodeActAgent class.
Parameters:
- llm (LLM): The llm to be used by this agent
- config (AgentConfig): The configuration for this agent
"""
super().__init__(config, llm_registry)
super().__init__(llm, config)
self.pending_actions: deque['Action'] = deque()
self.reset()
self.tools = self._get_tools()
@@ -89,7 +93,7 @@ class CodeActAgent(Agent):
# Create a ConversationMemory instance
self.conversation_memory = ConversationMemory(self.config, self.prompt_manager)
self.condenser = Condenser.from_config(self.config.condenser, llm_registry)
self.condenser = Condenser.from_config(self.config.condenser)
logger.debug(f'Using condenser: {type(self.condenser)}')
@property
@@ -121,7 +125,9 @@ class CodeActAgent(Agent):
tools = []
if self.config.enable_cmd:
tools.append(create_cmd_run_tool(use_short_description=use_short_tool_desc))
tools.append(
CmdRunTool(use_short_description=use_short_tool_desc).to_param()
)
if self.config.enable_think:
tools.append(ThinkTool)
if self.config.enable_finish:

View File

@@ -16,9 +16,10 @@ from openhands.agenthub.codeact_agent.tools import (
IPythonTool,
LLMBasedFileEditTool,
ThinkTool,
create_cmd_run_tool,
create_str_replace_editor_tool,
execute_bash,
)
from openhands.agenthub.codeact_agent.tools.bash import CmdRunTool
from openhands.core.exceptions import (
FunctionCallNotExistsError,
FunctionCallValidationError,
@@ -30,7 +31,6 @@ from openhands.events.action import (
AgentFinishAction,
AgentThinkAction,
BrowseInteractiveAction,
CmdRunAction,
FileEditAction,
FileReadAction,
IPythonRunCellAction,
@@ -43,6 +43,11 @@ from openhands.events.event import FileEditSource, FileReadSource
from openhands.events.tool import ToolCallMetadata
from openhands.llm.tool_names import TASK_TRACKER_TOOL_NAME
# Tool handlers registry for class-based tools
_TOOL_HANDLERS = {
execute_bash.name: CmdRunTool(),
}
def combine_thought(action: Action, thought: str) -> Action:
if not hasattr(action, 'thought'):
@@ -86,23 +91,8 @@ def response_to_actions(
# CmdRunTool (Bash)
# ================================================
if tool_call.function.name == create_cmd_run_tool()['function']['name']:
if 'command' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "command" in tool call {tool_call.function.name}'
)
# convert is_input to boolean
is_input = arguments.get('is_input', 'false') == 'true'
action = CmdRunAction(command=arguments['command'], is_input=is_input)
# Set hard timeout if provided
if 'timeout' in arguments:
try:
action.set_hard_timeout(float(arguments['timeout']))
except ValueError as e:
raise FunctionCallValidationError(
f"Invalid float passed to 'timeout' argument: {arguments['timeout']}"
) from e
if tool_call.function.name == execute_bash.name:
action = _TOOL_HANDLERS[execute_bash.name].to_action(arguments)
# ================================================
# IPythonTool (Jupyter)

View File

@@ -1,4 +1,9 @@
from .bash import create_cmd_run_tool
from .bash import create_cmd_run_tool, execute_bash
# NOTE: This module currently exposes schema-only tools. As part of #10441 we are
# gradually encapsulating tools as classes that own schema and validation. See
# bash.CmdRunTool for the first example. Existing code remains backward
# compatible by exporting ChatCompletionToolParam for now.
from .browser import BrowserTool
from .condensation_request import CondensationRequestTool
from .finish import FinishTool
@@ -11,6 +16,7 @@ __all__ = [
'BrowserTool',
'CondensationRequestTool',
'create_cmd_run_tool',
'execute_bash',
'FinishTool',
'IPythonTool',
'LLMBasedFileEditTool',

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
import json
from abc import ABC, abstractmethod
from typing import Any
from litellm import ChatCompletionToolParam
from openhands.core.exceptions import FunctionCallValidationError
class Tool(ABC):
"""Base class for CodeAct tools.
Subclasses should encapsulate schema, descriptions and validation.
They must implement to_param() and to_action().
"""
@abstractmethod
def to_param(self) -> ChatCompletionToolParam:
"""Return the ChatCompletionToolParam schema for this tool."""
raise NotImplementedError
def parse_arguments(self, raw_arguments: str) -> dict[str, Any]:
"""Parse the raw JSON string from the model into a dict.
Raises FunctionCallValidationError on failure.
"""
try:
return json.loads(raw_arguments) if raw_arguments else {}
except json.decoder.JSONDecodeError as e:
raise FunctionCallValidationError(
f'Failed to parse tool call arguments: {raw_arguments}'
) from e
@abstractmethod
def to_action(self, arguments: dict[str, Any]): # -> Action
"""Convert validated arguments to an Action.
Implementations should raise FunctionCallValidationError for
missing/invalid parameters.
"""
raise NotImplementedError

View File

@@ -1,8 +1,22 @@
from __future__ import annotations
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
from openhands.agenthub.codeact_agent.tools.base import Tool
from openhands.agenthub.codeact_agent.tools.prompt import refine_prompt
from openhands.core.exceptions import FunctionCallValidationError
from openhands.events.action import CmdRunAction
from openhands.llm.tool_names import EXECUTE_BASH_TOOL_NAME
class _ToolRef:
def __init__(self, name: str) -> None:
self.name = name
# Lightweight reference so callers can compare against execute_bash.name
execute_bash = _ToolRef(EXECUTE_BASH_TOOL_NAME)
_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
@@ -28,6 +42,65 @@ _DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a
* Output truncation: If the output exceeds a maximum length, it will be truncated before being returned.
"""
class CmdRunTool(Tool):
def __init__(self, use_short_description: bool = False) -> None:
self.use_short_description = use_short_description
def to_param(self) -> ChatCompletionToolParam:
description = (
_SHORT_BASH_DESCRIPTION
if self.use_short_description
else _DETAILED_BASH_DESCRIPTION
)
return ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name=EXECUTE_BASH_TOOL_NAME,
description=refine_prompt(description),
parameters={
'type': 'object',
'properties': {
'command': {
'type': 'string',
'description': refine_prompt(
'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.'
),
},
'is_input': {
'type': 'string',
'description': refine_prompt(
'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.'
),
'enum': ['true', 'false'],
},
'timeout': {
'type': 'number',
'description': 'Optional. Sets a hard timeout in seconds for the command execution. If not provided, the command will use the default soft timeout behavior.',
},
},
'required': ['command'],
},
),
)
def to_action(self, arguments: dict[str, str]) -> CmdRunAction:
if 'command' not in arguments:
raise FunctionCallValidationError(
'Missing required argument "command" in tool call execute_bash'
)
is_input = arguments.get('is_input', 'false') == 'true'
action = CmdRunAction(command=arguments['command'], is_input=is_input)
if 'timeout' in arguments:
try:
action.set_hard_timeout(float(arguments['timeout']))
except ValueError as e:
raise FunctionCallValidationError(
f"Invalid float passed to 'timeout' argument: {arguments['timeout']}"
) from e
return action
_SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal.
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`. For commands that need to run for a specific duration, you can set the "timeout" argument to specify a hard timeout in seconds.
* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.

View File

@@ -22,7 +22,7 @@ from openhands.events.observation import (
Observation,
)
from openhands.events.serialization.event import event_to_dict
from openhands.llm.llm_registry import LLMRegistry
from openhands.llm.llm import LLM
"""
FIXME: There are a few problems this surfaced
@@ -42,12 +42,8 @@ class DummyAgent(Agent):
without making any LLM calls.
"""
def __init__(
self,
config: AgentConfig,
llm_registry: LLMRegistry,
):
super().__init__(config, llm_registry)
def __init__(self, llm: LLM, config: AgentConfig):
super().__init__(llm, config)
self.steps: list[ActionObs] = [
{
'action': MessageAction('Time to get started!'),

View File

@@ -4,7 +4,7 @@ import openhands.agenthub.loc_agent.function_calling as locagent_function_callin
from openhands.agenthub.codeact_agent import CodeActAgent
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
from openhands.llm.llm_registry import LLMRegistry
from openhands.llm.llm import LLM
if TYPE_CHECKING:
from openhands.events.action import Action
@@ -16,8 +16,8 @@ class LocAgent(CodeActAgent):
def __init__(
self,
llm: LLM,
config: AgentConfig,
llm_registry: LLMRegistry,
) -> None:
"""Initializes a new instance of the LocAgent class.
@@ -25,8 +25,7 @@ class LocAgent(CodeActAgent):
- llm (LLM): The llm to be used by this agent
- config (AgentConfig): The configuration for the agent
"""
super().__init__(config, llm_registry)
super().__init__(llm, config)
self.tools = locagent_function_calling.get_tools()
logger.debug(

View File

@@ -3,8 +3,6 @@
import os
from typing import TYPE_CHECKING
from openhands.llm.llm_registry import LLMRegistry
if TYPE_CHECKING:
from litellm import ChatCompletionToolParam
@@ -17,6 +15,7 @@ from openhands.agenthub.readonly_agent import (
)
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
from openhands.llm.llm import LLM
from openhands.utils.prompt import PromptManager
@@ -38,16 +37,17 @@ class ReadOnlyAgent(CodeActAgent):
def __init__(
self,
llm: LLM,
config: AgentConfig,
llm_registry: LLMRegistry,
) -> None:
"""Initializes a new instance of the ReadOnlyAgent class.
Parameters:
- llm (LLM): The llm to be used by this agent
- config (AgentConfig): The configuration for this agent
"""
# Initialize the CodeActAgent class; some of it is overridden with class methods
super().__init__(config, llm_registry)
super().__init__(llm, config)
logger.debug(
f'TOOLS loaded for ReadOnlyAgent: {", ".join([tool.get("function").get("name") for tool in self.tools])}'

View File

@@ -16,7 +16,7 @@ from openhands.events.action import (
from openhands.events.event import EventSource
from openhands.events.observation import BrowserOutputObservation
from openhands.events.observation.observation import Observation
from openhands.llm.llm_registry import LLMRegistry
from openhands.llm.llm import LLM
from openhands.runtime.plugins import (
PluginRequirement,
)
@@ -127,13 +127,17 @@ class VisualBrowsingAgent(Agent):
sandbox_plugins: list[PluginRequirement] = []
response_parser = BrowsingResponseParser()
def __init__(self, config: AgentConfig, llm_registry: LLMRegistry) -> None:
def __init__(
self,
llm: LLM,
config: AgentConfig,
) -> None:
"""Initializes a new instance of the VisualBrowsingAgent class.
Parameters:
- llm (LLM): The llm to be used by this agent
"""
super().__init__(config, llm_registry)
super().__init__(llm, config)
# define a configurable action space, with chat functionality, web navigation, and webpage grounding using accessibility tree and HTML.
# see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/highlevel.py for more details
action_subsets = [

View File

@@ -83,7 +83,6 @@ from openhands.microagent.microagent import BaseMicroagent
from openhands.runtime import get_runtime_cls
from openhands.runtime.base import Runtime
from openhands.storage.settings.file_settings_store import FileSettingsStore
from openhands.utils.utils import create_registry_and_conversation_stats
async def cleanup_session(
@@ -148,16 +147,9 @@ async def run_session(
None, display_initialization_animation, 'Initializing...', is_loaded
)
llm_registry, conversation_stats, config = create_registry_and_conversation_stats(
config,
sid,
None,
)
agent = create_agent(config, llm_registry)
agent = create_agent(config)
runtime = create_runtime(
config,
llm_registry,
sid=sid,
headless_mode=True,
agent=agent,
@@ -169,9 +161,7 @@ async def run_session(
runtime.subscribe_to_shell_stream(stream_to_console)
controller, initial_state = create_controller(
agent, runtime, config, conversation_stats
)
controller, initial_state = create_controller(agent, runtime, config)
event_stream = runtime.event_stream
@@ -275,7 +265,7 @@ async def run_session(
if event.agent_state == AgentState.RUNNING:
display_agent_running_message()
start_pause_listener(loop, is_paused, event_stream, config)
start_pause_listener(loop, is_paused, event_stream)
def on_event(event: Event) -> None:
loop.create_task(on_event_async(event))

View File

@@ -1,26 +0,0 @@
from prompt_toolkit.styles import Style, merge_styles
from prompt_toolkit.styles.defaults import default_ui_style
# Centralized helper for CLI styles so we can safely merge our custom colors
# with prompt_toolkit's default UI style. This preserves completion menu and
# fuzzy-match visibility across different terminal themes (e.g., Ubuntu).
COLOR_GOLD = '#FFD700'
COLOR_GREY = '#808080'
COLOR_AGENT_BLUE = '#4682B4' # Steel blue - readable on light/dark backgrounds
def get_cli_style() -> Style:
base = default_ui_style()
custom = Style.from_dict(
{
'gold': COLOR_GOLD,
'grey': COLOR_GREY,
'prompt': f'{COLOR_GOLD} bold',
# Ensure good contrast for fuzzy matches on the selected completion row
# across terminals/themes (e.g., Ubuntu GNOME, Alacritty, Kitty).
# See https://github.com/All-Hands-AI/OpenHands/issues/10330
'completion-menu.completion.current fuzzymatch.outside': 'fg:#ffffff bg:#888888',
}
)
return merge_styles([base, custom])

View File

@@ -8,8 +8,8 @@ from prompt_toolkit.shortcuts import print_container
from prompt_toolkit.widgets import Frame, TextArea
from pydantic import SecretStr
from openhands.cli.pt_style import COLOR_GREY, get_cli_style
from openhands.cli.tui import (
COLOR_GREY,
UserCancelledError,
cli_confirm,
kb_cancel,
@@ -242,7 +242,7 @@ async def modify_llm_settings_basic(
provider_list = verified_providers + provider_list
provider_completer = FuzzyWordCompleter(provider_list, WORD=True)
session = PromptSession(key_bindings=kb_cancel(), style=get_cli_style())
session = PromptSession(key_bindings=kb_cancel())
current_provider, current_model, current_api_key = (
_get_current_values_for_modification_basic(config)
@@ -490,7 +490,7 @@ async def modify_llm_settings_basic(
async def modify_llm_settings_advanced(
config: OpenHandsConfig, settings_store: FileSettingsStore
) -> None:
session = PromptSession(key_bindings=kb_cancel(), style=get_cli_style())
session = PromptSession(key_bindings=kb_cancel())
llm_config = config.get_llm_config()
custom_model = None
@@ -621,7 +621,7 @@ async def modify_search_api_settings(
config: OpenHandsConfig, settings_store: FileSettingsStore
) -> None:
"""Modify search API settings."""
session = PromptSession(key_bindings=kb_cancel(), style=get_cli_style())
session = PromptSession(key_bindings=kb_cancel())
search_api_key = None

View File

@@ -31,12 +31,6 @@ from prompt_toolkit.styles import Style
from prompt_toolkit.widgets import Frame, TextArea
from openhands import __version__
from openhands.cli.pt_style import (
COLOR_AGENT_BLUE,
COLOR_GOLD,
COLOR_GREY,
get_cli_style,
)
from openhands.core.config import OpenHandsConfig
from openhands.core.schema import AgentState
from openhands.events import EventSource, EventStream
@@ -72,7 +66,16 @@ recent_thoughts: list[str] = []
MAX_RECENT_THOUGHTS = 5
# Color and styling constants
DEFAULT_STYLE = get_cli_style()
COLOR_GOLD = '#FFD700'
COLOR_GREY = '#808080'
COLOR_AGENT_BLUE = '#4682B4' # Steel blue - less saturated, works well on both light and dark backgrounds
DEFAULT_STYLE = Style.from_dict(
{
'gold': COLOR_GOLD,
'grey': COLOR_GREY,
'prompt': f'{COLOR_GOLD} bold',
}
)
COMMANDS = {
'/exit': 'Exit the application',
@@ -87,9 +90,6 @@ COMMANDS = {
print_lock = threading.Lock()
# Lock to debounce sending Ctrl+C interrupts to the running command
_interrupt_lock: asyncio.Lock = asyncio.Lock()
pause_task: asyncio.Task | None = None # No more than one pause task
@@ -662,15 +662,6 @@ def display_help() -> None:
commands_html += f'<gold><b>{command}</b></gold> - <grey>{description}</grey>\n'
print_formatted_text(HTML(commands_html))
# Keyboard shortcuts section
print_formatted_text(HTML('\nKeyboard shortcuts:'))
shortcuts_html = (
'<gold><b>Ctrl+P</b></gold> - <grey>Pause the agent</grey>\n'
'<gold><b>Ctrl+C</b></gold> - <grey>Pause the agent; press twice quickly to interrupt a running command</grey>\n'
'<gold><b>Ctrl+D</b></gold> - <grey>Pause the agent</grey>\n'
)
print_formatted_text(HTML(shortcuts_html))
# Footer
print_formatted_text(
HTML(
@@ -876,13 +867,12 @@ async def read_confirmation_input(config: OpenHandsConfig) -> str:
def start_pause_listener(
loop: asyncio.AbstractEventLoop,
done_event: asyncio.Event,
event_stream: EventStream,
config: OpenHandsConfig,
event_stream,
) -> None:
global pause_task
if pause_task is None or pause_task.done():
pause_task = loop.create_task(
process_agent_pause(done_event, event_stream, config)
process_agent_pause(done_event, event_stream)
) # Create a task to track agent pause requests from the user
@@ -896,135 +886,16 @@ async def stop_pause_listener() -> None:
pause_task = None
def is_command_running(event_stream: EventStream) -> bool:
"""Check if a shell command is currently running using bounded reverse search.
We look at the latest relevant event (CmdRunAction or CmdOutputObservation):
- If it's a CmdOutputObservation with a finalized exit_code (>= 0), no command is running
- If it's a CmdOutputObservation with exit_code == -1, the command is still running (streaming)
- If it's a CmdRunAction (non-input), we assume a command has started and is running
"""
try:
from openhands.events.event_filter import EventFilter
filt = EventFilter(include_types=(CmdRunAction, CmdOutputObservation))
for ev in event_stream.search_events(reverse=True, filter=filt, limit=50):
if isinstance(ev, CmdOutputObservation):
return ev.metadata.exit_code == -1
if isinstance(ev, CmdRunAction):
if ev.is_input:
continue
return True
return False
except Exception:
# If detection fails for any reason, default to no running command
return False
async def _handle_command_interrupt(
event_stream: EventStream, config: OpenHandsConfig
) -> bool:
"""Handle command interruption with user confirmation.
Returns:
bool: True if the interrupt was handled, False if the user wants to pause the agent
"""
print_formatted_text('')
print_formatted_text(HTML('<gold>Command is currently running.</gold>'))
print_formatted_text('')
# Keep legacy behavior: single Ctrl+C pauses by default. Offer kill as opt-in.
choices = [
'Pause the agent (default)',
'Continue waiting for command to complete',
'Send interrupt to running command (Ctrl+C)',
]
# Use the passed-in config so we honor CLI settings like VI mode. Run the blocking UI off the loop.
selection = await asyncio.to_thread(
cli_confirm, config, 'What would you like to do?', choices, 0
)
if selection == 2: # Send interrupt to the running command
print_formatted_text('')
print_formatted_text(
HTML('<gold>Sending interrupt signal to running command...</gold>')
)
# Debounce rapid interrupts to avoid multiple concurrent dialogs/interrupts
if _interrupt_lock.locked():
print_formatted_text(HTML('<grey>Interrupt already sent; waiting…</grey>'))
return True
async with _interrupt_lock:
event_stream.add_event(
CmdRunAction(command='C-c', is_input=True),
EventSource.USER,
)
return True
elif selection == 1: # Continue waiting
print_formatted_text('')
print_formatted_text(
HTML('<gold>Continuing to wait for command completion...</gold>')
)
return True
else: # Pause the agent (selection == 0)
return False
async def _handle_interrupt_async(
event_stream: EventStream, done: asyncio.Event, config: OpenHandsConfig
) -> None:
"""Handle the interrupt asynchronously to avoid blocking the input handler."""
try:
handled = await _handle_command_interrupt(event_stream, config)
if not handled:
# User chose to pause the agent
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
except Exception as e:
# If something goes wrong, fall back to pausing the agent
print_formatted_text('')
print_formatted_text(HTML(f'<ansired>Error handling interrupt: {e}</ansired>'))
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
async def process_agent_pause(
done: asyncio.Event, event_stream: EventStream, config: OpenHandsConfig
) -> None:
async def process_agent_pause(done: asyncio.Event, event_stream: EventStream) -> None:
input = create_input()
# Double-press detection window for Ctrl+C to send interrupt to running command
CTRL_C_WINDOW_SECONDS = 0.4
ctrl_c_timer: asyncio.Task | None = None
async def pause_after_delay(delay: float) -> None:
try:
await asyncio.sleep(delay)
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
except asyncio.CancelledError:
# Timer canceled because a second Ctrl+C was detected; do nothing
pass
def keys_ready() -> None:
nonlocal ctrl_c_timer
for key_press in input.read_keys():
if key_press.key == Keys.ControlP or key_press.key == Keys.ControlD:
# Immediate pause
if (
key_press.key == Keys.ControlP
or key_press.key == Keys.ControlC
or key_press.key == Keys.ControlD
):
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
@@ -1032,47 +903,6 @@ async def process_agent_pause(
EventSource.USER,
)
done.set()
elif key_press.key == Keys.ControlC:
if is_command_running(event_stream):
# If a timer is already running, this is a double-press: send interrupt
if ctrl_c_timer and not ctrl_c_timer.done():
ctrl_c_timer.cancel()
ctrl_c_timer = None
if _interrupt_lock.locked():
print_formatted_text(
HTML('<grey>Interrupt already sent; waiting…</grey>')
)
continue
# Send Ctrl+C to the running command
async def send_interrupt() -> None:
async with _interrupt_lock:
print_formatted_text('')
print_formatted_text(
HTML(
'<gold>Sending interrupt signal to running command...</gold>'
)
)
event_stream.add_event(
CmdRunAction(command='C-c', is_input=True),
EventSource.USER,
)
asyncio.create_task(send_interrupt())
else:
# Start a short window; if no second press, pause
ctrl_c_timer = asyncio.create_task(
pause_after_delay(CTRL_C_WINDOW_SECONDS)
)
else:
# No command running: default immediate pause
print_formatted_text('')
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
event_stream.add_event(
ChangeAgentStateAction(AgentState.PAUSED),
EventSource.USER,
)
done.set()
try:
with input.raw_mode():

View File

@@ -3,8 +3,6 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING
from openhands.llm.llm_registry import LLMRegistry
if TYPE_CHECKING:
from openhands.controller.state.state import State
from openhands.events.action import Action
@@ -19,6 +17,7 @@ from openhands.core.exceptions import (
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.event import EventSource
from openhands.llm.llm import LLM
from openhands.runtime.plugins import PluginRequirement
@@ -39,11 +38,10 @@ class Agent(ABC):
def __init__(
self,
llm: LLM,
config: AgentConfig,
llm_registry: LLMRegistry,
):
self.llm = llm_registry.get_llm_from_agent_config('agent', config)
self.llm_registry = llm_registry
self.llm = llm
self.config = config
self._complete = False
self._prompt_manager: 'PromptManager' | None = None

View File

@@ -73,9 +73,9 @@ from openhands.events.observation import (
Observation,
)
from openhands.events.serialization.event import truncate_content
from openhands.llm.llm import LLM
from openhands.llm.metrics import Metrics
from openhands.runtime.runtime_status import RuntimeStatus
from openhands.server.services.conversation_stats import ConversationStats
from openhands.storage.files import FileStore
# note: RESUME is only available on web GUI
@@ -109,7 +109,6 @@ class AgentController:
self,
agent: Agent,
event_stream: EventStream,
conversation_stats: ConversationStats,
iteration_delta: int,
budget_per_task_delta: float | None = None,
agent_to_llm_config: dict[str, LLMConfig] | None = None,
@@ -149,7 +148,6 @@ class AgentController:
self.agent = agent
self.headless_mode = headless_mode
self.is_delegate = is_delegate
self.conversation_stats = conversation_stats
# the event stream must be set before maybe subscribing to it
self.event_stream = event_stream
@@ -165,7 +163,6 @@ class AgentController:
# state from the previous session, state from a parent agent, or a fresh state
self.set_initial_state(
state=initial_state,
conversation_stats=conversation_stats,
max_iterations=iteration_delta,
max_budget_per_task=budget_per_task_delta,
confirmation_mode=confirmation_mode,
@@ -480,6 +477,11 @@ class AgentController:
log_level, str(observation_to_print), extra={'msg_type': 'OBSERVATION'}
)
# TODO: these metrics come from the draft editor, and they get accumulated into controller's state metrics and the agent's llm metrics
# In the future, we should have a more principled way to sharing metrics across all LLM instances for a given conversation
if observation.llm_metrics is not None:
self.state_tracker.merge_metrics(observation.llm_metrics)
# this happens for runnable actions and microagent actions
if self._pending_action and self._pending_action.id == observation.cause:
if self.state.agent_state == AgentState.AWAITING_USER_CONFIRMATION:
@@ -655,10 +657,14 @@ class AgentController:
"""
agent_cls: type[Agent] = Agent.get_cls(action.agent)
agent_config = self.agent_configs.get(action.agent, self.agent.config)
llm_config = self.agent_to_llm_config.get(action.agent, self.agent.llm.config)
# Make sure metrics are shared between parent and child for global accumulation
delegate_agent = agent_cls(
config=agent_config, llm_registry=self.agent.llm_registry
llm = LLM(
config=llm_config,
retry_listener=self.agent.llm.retry_listener,
metrics=self.state.metrics,
)
delegate_agent = agent_cls(llm=llm, config=agent_config)
# Take a snapshot of the current metrics before starting the delegate
state = State(
@@ -677,7 +683,7 @@ class AgentController:
)
self.log(
'debug',
f'start delegate, creating agent {delegate_agent.name}',
f'start delegate, creating agent {delegate_agent.name} using LLM {llm}',
)
# Create the delegate with is_delegate=True so it does NOT subscribe directly
@@ -687,7 +693,6 @@ class AgentController:
user_id=self.user_id,
agent=delegate_agent,
event_stream=self.event_stream,
conversation_stats=self.conversation_stats,
iteration_delta=self._initial_max_iterations,
budget_per_task_delta=self._initial_max_budget_per_task,
agent_to_llm_config=self.agent_to_llm_config,
@@ -790,8 +795,13 @@ class AgentController:
extra={'msg_type': 'STEP'},
)
# Synchronize spend across all llm services with the budget flag
# Ensure budget control flag is synchronized with the latest metrics.
# In the future, we should centralized the use of one LLM object per conversation.
# This will help us unify the cost for auto generating titles, running the condensor, etc.
# Before many microservices will touh the same llm cost field, we should sync with the budget flag for the controller
# and check that we haven't exceeded budget BEFORE executing an agent step.
self.state_tracker.sync_budget_flag_with_metrics()
if self._is_stuck():
await self._react_to_exception(
AgentStuckInLoopError('Agent got stuck in a loop')
@@ -951,15 +961,14 @@ class AgentController:
def set_initial_state(
self,
state: State | None,
conversation_stats: ConversationStats,
max_iterations: int,
max_budget_per_task: float | None,
confirmation_mode: bool = False,
):
self.state_tracker.set_initial_state(
self.id,
self.agent,
state,
conversation_stats,
max_iterations,
max_budget_per_task,
confirmation_mode,
@@ -1000,20 +1009,37 @@ class AgentController:
action: The action to attach metrics to
"""
# Get metrics from agent LLM
metrics = self.conversation_stats.get_combined_metrics()
agent_metrics = self.state.metrics
# Create a clean copy with only the fields we want to keep
clean_metrics = Metrics()
clean_metrics.accumulated_cost = metrics.accumulated_cost
clean_metrics._accumulated_token_usage = copy.deepcopy(
metrics.accumulated_token_usage
)
# Get metrics from condenser LLM if it exists
condenser_metrics: Metrics | None = None
if hasattr(self.agent, 'condenser') and hasattr(self.agent.condenser, 'llm'):
condenser_metrics = self.agent.condenser.llm.metrics
# Create a new minimal metrics object with just what the frontend needs
metrics = Metrics(model_name=agent_metrics.model_name)
# Set accumulated cost (sum of agent and condenser costs)
metrics.accumulated_cost = agent_metrics.accumulated_cost
if condenser_metrics:
metrics.accumulated_cost += condenser_metrics.accumulated_cost
# Add max_budget_per_task to metrics
if self.state.budget_flag:
clean_metrics.max_budget_per_task = self.state.budget_flag.max_value
metrics.max_budget_per_task = self.state.budget_flag.max_value
action.llm_metrics = clean_metrics
# Set accumulated token usage (sum of agent and condenser token usage)
# Use a deep copy to ensure we don't modify the original object
metrics._accumulated_token_usage = (
agent_metrics.accumulated_token_usage.model_copy(deep=True)
)
if condenser_metrics:
metrics._accumulated_token_usage = (
metrics._accumulated_token_usage
+ condenser_metrics.accumulated_token_usage
)
action.llm_metrics = metrics
# Log the metrics information for debugging
# Get the latest usage directly from the agent's metrics

View File

@@ -21,7 +21,6 @@ from openhands.events.action.agent import AgentFinishAction
from openhands.events.event import Event, EventSource
from openhands.llm.metrics import Metrics
from openhands.memory.view import View
from openhands.server.services.conversation_stats import ConversationStats
from openhands.storage.files import FileStore
from openhands.storage.locations import get_conversation_agent_state_filename
@@ -85,7 +84,6 @@ class State:
limit_increase_amount=100, current_value=0, max_value=100
)
)
conversation_stats: ConversationStats | None = None
budget_flag: BudgetControlFlag | None = None
confirmation_mode: bool = False
history: list[Event] = field(default_factory=list)
@@ -93,7 +91,8 @@ class State:
outputs: dict = field(default_factory=dict)
agent_state: AgentState = AgentState.LOADING
resume_state: AgentState | None = None
# global metrics for the current task
metrics: Metrics = field(default_factory=Metrics)
# root agent has level 0, and every delegate increases the level by one
delegate_level: int = 0
# start_id and end_id track the range of events in history
@@ -117,14 +116,9 @@ class State:
local_metrics: Metrics | None = None
delegates: dict[tuple[int, int], tuple[str, str]] | None = None
metrics: Metrics = field(default_factory=Metrics)
def save_to_session(
self, sid: str, file_store: FileStore, user_id: str | None
) -> None:
conversation_stats = self.conversation_stats
self.conversation_stats = None # Don't save conversation stats, handles itself
pickled = pickle.dumps(self)
logger.debug(f'Saving state to session {sid}:{self.agent_state}')
encoded = base64.b64encode(pickled).decode('utf-8')
@@ -144,8 +138,6 @@ class State:
logger.error(f'Failed to save state to session: {e}')
raise e
self.conversation_stats = conversation_stats # restore reference
@staticmethod
def restore_from_session(
sid: str, file_store: FileStore, user_id: str | None = None

View File

@@ -1,3 +1,4 @@
from openhands.controller.agent import Agent
from openhands.controller.state.control_flags import (
BudgetControlFlag,
IterationControlFlag,
@@ -13,7 +14,7 @@ from openhands.events.observation.delegate import AgentDelegateObservation
from openhands.events.observation.empty import NullObservation
from openhands.events.serialization.event import event_to_trajectory
from openhands.events.stream import EventStream
from openhands.server.services.conversation_stats import ConversationStats
from openhands.llm.metrics import Metrics
from openhands.storage.files import FileStore
@@ -50,8 +51,8 @@ class StateTracker:
def set_initial_state(
self,
id: str,
agent: Agent,
state: State | None,
conversation_stats: ConversationStats,
max_iterations: int,
max_budget_per_task: float | None,
confirmation_mode: bool = False,
@@ -74,7 +75,6 @@ class StateTracker:
session_id=id.removesuffix('-delegate'),
user_id=self.user_id,
inputs={},
conversation_stats=conversation_stats,
iteration_flag=IterationControlFlag(
limit_increase_amount=max_iterations,
current_value=0,
@@ -99,7 +99,13 @@ class StateTracker:
if self.state.start_id <= -1:
self.state.start_id = 0
state.conversation_stats = conversation_stats
logger.info(
f'AgentController {id} initializing history from event {self.state.start_id}',
)
# Share the state metrics with the agent's LLM metrics
# This ensures that all accumulated metrics are always in sync between controller and llm
agent.llm.metrics = self.state.metrics
def _init_history(self, event_stream: EventStream) -> None:
"""Initializes the agent's history from the event stream.
@@ -248,9 +254,6 @@ class StateTracker:
if self.sid and self.file_store:
self.state.save_to_session(self.sid, self.file_store, self.user_id)
if self.state.conversation_stats:
self.state.conversation_stats.save_metrics()
def run_control_flags(self):
"""Performs one step of the control flags"""
self.state.iteration_flag.step()
@@ -261,8 +264,20 @@ class StateTracker:
"""Ensures that budget flag is up to date with accumulated costs from llm completions
Budget flag will monitor for when budget is exceeded
"""
# Sync cost across all llm services from llm registry
if self.state.budget_flag and self.state.conversation_stats:
self.state.budget_flag.current_value = (
self.state.conversation_stats.get_combined_metrics().accumulated_cost
)
if self.state.budget_flag:
self.state.budget_flag.current_value = self.state.metrics.accumulated_cost
def merge_metrics(self, metrics: Metrics):
"""Merges metrics with the state metrics
NOTE: this should be refactored in the future. We should have services (draft llm, title autocomplete, condenser, etc)
use their own LLMs, but the metrics object should be shared. This way we have one source of truth for accumulated costs from
all services
This would prevent having fragmented stores for metrics, and we don't have the burden of deciding where and how to store them
if we decide introduce more specialized services that require llm completions
"""
self.state.metrics.merge(metrics)
if self.state.budget_flag:
self.state.budget_flag.current_value = self.state.metrics.accumulated_cost

View File

@@ -172,6 +172,9 @@ class LLMConfig(BaseModel):
# Set reasoning_effort to 'high' by default for non-Gemini models
# Gemini models use optimized thinking budget when reasoning_effort is None
logger.debug(
f'Setting reasoning_effort for model {self.model} with reasoning_effort {self.reasoning_effort}'
)
if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model:
self.reasoning_effort = 'high'

View File

@@ -157,16 +157,13 @@ class OpenHandsConfig(BaseModel):
"""Get a map of agent names to llm configs."""
return {name: self.get_llm_config_from_agent(name) for name in self.agents}
def get_llm_config_from_agent_config(self, agent_config: AgentConfig):
def get_llm_config_from_agent(self, name: str = 'agent') -> LLMConfig:
agent_config: AgentConfig = self.get_agent_config(name)
llm_config_name = (
agent_config.llm_config if agent_config.llm_config is not None else 'llm'
)
return self.get_llm_config(llm_config_name)
def get_llm_config_from_agent(self, name: str = 'agent') -> LLMConfig:
agent_config: AgentConfig = self.get_agent_config(name)
return self.get_llm_config_from_agent_config(agent_config)
def get_agent_configs(self) -> dict[str, AgentConfig]:
return self.agents

View File

@@ -18,7 +18,6 @@ class SandboxConfig(BaseModel):
remote_runtime_enable_retries: Whether to enable retries (on recoverable errors like requests.ConnectionError) for the remote runtime API requests.
enable_auto_lint: Whether to enable auto-lint.
use_host_network: Whether to use the host network.
additional_networks: A list of additional Docker networks to connect to
runtime_binding_address: The binding address for the runtime ports. It specifies which network interface on the host machine Docker should bind the runtime ports to.
initialize_plugins: Whether to initialize plugins.
force_rebuild_runtime: Whether to force rebuild the runtime image.
@@ -66,7 +65,6 @@ class SandboxConfig(BaseModel):
default=False
) # once enabled, OpenHands would lint files after editing
use_host_network: bool = Field(default=False)
additional_networks: list[str] = Field(default=[])
runtime_binding_address: str = Field(default='0.0.0.0')
runtime_extra_build_args: list[str] | None = Field(default=None)
initialize_plugins: bool = Field(default=True)

View File

@@ -6,6 +6,7 @@ from typing import Callable, Protocol
import openhands.agenthub # noqa F401 (we import this to get the agents registered)
import openhands.cli.suppress_warnings # noqa: F401
from openhands.controller.agent import Agent
from openhands.controller.replay import ReplayManager
from openhands.controller.state.state import State
from openhands.core.config import (
@@ -36,7 +37,6 @@ from openhands.mcp import add_mcp_tools_to_agent
from openhands.memory.memory import Memory
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.utils import create_registry_and_conversation_stats
class FakeUserResponseFunc(Protocol):
@@ -53,6 +53,7 @@ async def run_controller(
initial_user_action: Action,
sid: str | None = None,
runtime: Runtime | None = None,
agent: Agent | None = None,
exit_on_message: bool = False,
fake_user_response_fn: FakeUserResponseFunc | None = None,
headless_mode: bool = True,
@@ -69,6 +70,7 @@ async def run_controller(
sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
runtime: (optional) A runtime for the agent to run on.
agent: (optional) A agent to run.
exit_on_message: quit if agent asks for a message from user (optional)
fake_user_response_fn: An optional function that receives the current state
(could be None) and returns a fake user response.
@@ -96,13 +98,8 @@ async def run_controller(
"""
sid = sid or generate_sid(config)
llm_registry, conversation_stats, config = create_registry_and_conversation_stats(
config,
sid,
None,
)
agent = create_agent(config, llm_registry)
if agent is None:
agent = create_agent(config)
# when the runtime is created, it will be connected and clone the selected repository
repo_directory = None
@@ -111,7 +108,6 @@ async def run_controller(
repo_tokens = get_provider_tokens()
runtime = create_runtime(
config,
llm_registry,
sid=sid,
headless_mode=headless_mode,
agent=agent,
@@ -163,7 +159,7 @@ async def run_controller(
)
controller, initial_state = create_controller(
agent, runtime, config, conversation_stats, replay_events=replay_events
agent, runtime, config, replay_events=replay_events
)
assert isinstance(initial_user_action, Action), (

Some files were not shown because too many files have changed in this diff Show More