mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| aea92f3869 | |||
| cee88aff48 | |||
| 0c2283abcc |
@@ -22,7 +22,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install poetry via pipx
|
||||
uses: abatilo/actions-poetry@v4
|
||||
uses: abatilo/actions-poetry@v3
|
||||
with:
|
||||
poetry-version: 2.1.3
|
||||
|
||||
@@ -183,11 +183,7 @@ jobs:
|
||||
|
||||
# Run the tests with detailed output
|
||||
cd tests/e2e
|
||||
poetry run python -m pytest \
|
||||
test_settings.py::test_github_token_configuration \
|
||||
test_conversation.py::test_conversation_start \
|
||||
test_browsing_catchphrase.py::test_browsing_catchphrase \
|
||||
-v --no-header --capture=no --timeout=900
|
||||
poetry run python -m pytest test_settings.py::test_github_token_configuration test_conversation.py::test_conversation_start -v --no-header --capture=no --timeout=600
|
||||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
|
||||
@@ -29,12 +29,6 @@ jobs:
|
||||
run: |
|
||||
cd frontend
|
||||
npm install --frozen-lockfile
|
||||
- name: Generate i18n and route types
|
||||
run: |
|
||||
cd frontend
|
||||
npm run make-i18n
|
||||
npx react-router typegen || true
|
||||
|
||||
- name: Fix frontend lint issues
|
||||
run: |
|
||||
cd frontend
|
||||
@@ -51,7 +45,7 @@ jobs:
|
||||
git config --local user.email "openhands@all-hands.dev"
|
||||
git config --local user.name "OpenHands Bot"
|
||||
git add -A
|
||||
git commit -m "🤖 Auto-fix frontend linting issues" --no-verify
|
||||
git commit -m "🤖 Auto-fix frontend linting issues"
|
||||
git push
|
||||
|
||||
# Python lint fixes
|
||||
@@ -93,5 +87,5 @@ jobs:
|
||||
git config --local user.email "openhands@all-hands.dev"
|
||||
git config --local user.name "OpenHands Bot"
|
||||
git add -A
|
||||
git commit -m "🤖 Auto-fix Python linting issues" --no-verify
|
||||
git commit -m "🤖 Auto-fix Python linting issues"
|
||||
git push
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: poetry install --with dev,test,runtime
|
||||
- name: Run Windows unit tests
|
||||
run: poetry run pytest -svv tests/unit/runtime/utils/test_windows_bash.py
|
||||
run: poetry run pytest -svv tests/unit/test_windows_bash.py
|
||||
env:
|
||||
PYTHONPATH: ".;$env:PYTHONPATH"
|
||||
DEBUG: "1"
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
name: Welcome Good First Issue
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [labeled]
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
comment-on-good-first-issue:
|
||||
if: github.event.label.name == 'good first issue'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if welcome comment already exists
|
||||
id: check_comment
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
result-encoding: string
|
||||
script: |
|
||||
const issueNumber = context.issue.number;
|
||||
const comments = await github.rest.issues.listComments({
|
||||
...context.repo,
|
||||
issue_number: issueNumber
|
||||
});
|
||||
|
||||
const alreadyCommented = comments.data.some(
|
||||
(comment) =>
|
||||
comment.body.includes('<!-- auto-comment:good-first-issue -->')
|
||||
);
|
||||
|
||||
return alreadyCommented ? 'true' : 'false';
|
||||
|
||||
- name: Leave welcome comment
|
||||
if: steps.check_comment.outputs.result == 'false'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
...context.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: "🙌 **Hey there, future contributor!** 🙌\n\n" +
|
||||
"This issue has been labeled as **good first issue**, which means it's a great place to get started with the OpenHands project.\n\n" +
|
||||
"If you're interested in working on it, feel free to! No need to ask for permission.\n\n" +
|
||||
"Be sure to check out our [development setup guide](" + repoUrl + "/blob/main/Development.md) to get your environment set up, and follow our [contribution guidelines](" + repoUrl + "/blob/main/CONTRIBUTING.md) when you're ready to submit a fix.\n\n" +
|
||||
"🙌 Happy hacking! 🙌\n\n" +
|
||||
"<!-- auto-comment:good-first-issue -->"
|
||||
});
|
||||
@@ -257,5 +257,3 @@ containers/runtime/code
|
||||
|
||||
# test results
|
||||
test-results
|
||||
.sessions
|
||||
.eval_sessions
|
||||
|
||||
@@ -87,8 +87,6 @@ VSCode Extension:
|
||||
|
||||
If you are starting a pull request (PR), please follow the template in `.github/pull_request_template.md`.
|
||||
|
||||
If you need to add labels when opening a PR, check the existing labels defined on that repository and select from existing ones. Do not invent your own labels.
|
||||
|
||||
## Implementation Details
|
||||
|
||||
These details may or may not be useful for your current task.
|
||||
@@ -144,35 +142,6 @@ Your specialized knowledge and instructions here...
|
||||
- Add the setting to the `Settings` model in `openhands/storage/data_models/settings.py`
|
||||
- Update any relevant backend code to apply the setting (e.g., in session creation)
|
||||
|
||||
#### Settings UI Patterns:
|
||||
|
||||
There are two main patterns for saving settings in the OpenHands frontend:
|
||||
|
||||
**Pattern 1: Entity-based Resources (Immediate Save)**
|
||||
- Used for: API Keys, Secrets, MCP Servers
|
||||
- Behavior: Changes are saved immediately when user performs actions (add/edit/delete)
|
||||
- Implementation:
|
||||
- No "Save Changes" button
|
||||
- No local state management or `isDirty` tracking
|
||||
- Uses dedicated mutation hooks for each operation (e.g., `use-add-mcp-server.ts`, `use-delete-mcp-server.ts`)
|
||||
- Each mutation triggers immediate API call with query invalidation for UI updates
|
||||
- Example: MCP settings, API Keys & Secrets tabs
|
||||
- Benefits: Simpler UX, no risk of losing changes, consistent with modern web app patterns
|
||||
|
||||
**Pattern 2: Form-based Settings (Manual Save)**
|
||||
- Used for: Application settings, LLM configuration
|
||||
- Behavior: Changes are accumulated locally and saved when user clicks "Save Changes"
|
||||
- Implementation:
|
||||
- Has "Save Changes" button that becomes enabled when changes are detected
|
||||
- Uses local state management with `isDirty` tracking
|
||||
- Uses `useSaveSettings` hook to save all changes at once
|
||||
- Example: LLM tab, Application tab
|
||||
- Benefits: Allows bulk changes, explicit save action, can validate all fields before saving
|
||||
|
||||
**When to use each pattern:**
|
||||
- Use Pattern 1 (Immediate Save) for entity management where each item is independent
|
||||
- Use Pattern 2 (Manual Save) for configuration forms where settings are interdependent or need validation
|
||||
|
||||
### Adding New LLM Models
|
||||
|
||||
To add a new LLM model to OpenHands, you need to update multiple files across both frontend and backend:
|
||||
|
||||
+1
-1
@@ -159,7 +159,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker
|
||||
container image by setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.54-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.53-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
@@ -79,17 +79,17 @@ You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)
|
||||
You can also run OpenHands directly with Docker:
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
+3
-3
@@ -51,17 +51,17 @@ OpenHands也可以使用Docker在本地系统上运行。
|
||||
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
> **注意**: 如果您在0.44版本之前使用过OpenHands,您可能需要运行 `mv ~/.openhands-state ~/.openhands` 来将对话历史迁移到新位置。
|
||||
|
||||
+3
-3
@@ -42,17 +42,17 @@ OpenHandsはDockerを利用してローカル環境でも実行できます。
|
||||
> 公共ネットワークで実行していますか?[Hardened Docker Installation Guide](https://docs.all-hands.dev/usage/runtimes/docker#hardened-docker-installation)を参照して、ネットワークバインディングの制限や追加のセキュリティ対策を実施してください。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
**注**: バージョン0.44以前のOpenHandsを使用していた場合は、会話履歴を移行するために `mv ~/.openhands-state ~/.openhands` を実行してください。
|
||||
|
||||
@@ -363,11 +363,10 @@ classpath = "my_package.my_module.MyCustomAgent"
|
||||
#confirmation_mode = false
|
||||
|
||||
# The security analyzer to use (For Headless / CLI only - In Web this is overridden by Session Init)
|
||||
# Available options: 'llm' (default), 'invariant'
|
||||
#security_analyzer = "llm"
|
||||
#security_analyzer = ""
|
||||
|
||||
# Whether to enable security analyzer
|
||||
#enable_security_analyzer = true
|
||||
#enable_security_analyzer = false
|
||||
|
||||
#################################### Condenser #################################
|
||||
# Condensers control how conversation history is managed and compressed when
|
||||
|
||||
@@ -21,7 +21,7 @@ ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& apt-get install -y curl make git build-essential jq gettext \
|
||||
&& apt-get install -y curl make git build-essential \
|
||||
&& python3 -m pip install poetry --break-system-packages
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
@@ -12,7 +12,7 @@ services:
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
- DOCKER_HOST_ADDR=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.54-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.53-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of ~/.openhands for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
+2082
-3928
File diff suppressed because it is too large
Load Diff
|
Before Width: | Height: | Size: 113 KiB After Width: | Height: | Size: 113 KiB |
@@ -2,102 +2,55 @@
|
||||
title: Backend Architecture
|
||||
---
|
||||
|
||||
<div style={{ textAlign: 'center' }}>
|
||||
<img src="https://github.com/All-Hands-AI/OpenHands/assets/16201837/97d747e3-29d8-4ccb-8d34-6ad1adb17f38" alt="OpenHands System Architecture Diagram Jul 4 2024" />
|
||||
<p><em>OpenHands System Architecture Diagram (July 4, 2024)</em></p>
|
||||
</div>
|
||||
|
||||
This is a high-level overview of the system architecture. The system is divided into two main components: the frontend and the backend. The frontend is responsible for handling user interactions and displaying the results. The backend is responsible for handling the business logic and executing the agents.
|
||||
|
||||
# System overview
|
||||
# Frontend architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
U["User"] --> FE["Frontend (SPA)"]
|
||||
FE -- "HTTP/WS" --> BE["OpenHands Backend"]
|
||||
BE --> ES["EventStream"]
|
||||
BE --> ST["Storage"]
|
||||
BE --> RT["Runtime Interface"]
|
||||
BE --> LLM["LLM Providers"]
|
||||
|
||||
subgraph Runtime
|
||||
direction TB
|
||||
RT --> DRT["Docker Runtime"]
|
||||
RT --> LRT["Local Runtime"]
|
||||
RT --> RRT["Remote Runtime"]
|
||||
DRT --> AES["Action Execution Server"]
|
||||
LRT --> AES
|
||||
RRT --> AES
|
||||
AES --> Bash["Bash Session"]
|
||||
AES --> Jupyter["Jupyter Plugin"]
|
||||
AES --> Browser["BrowserEnv"]
|
||||
end
|
||||
```
|
||||

|
||||
|
||||
This Overview is simplified to show the main components and their interactions. For a more detailed view of the backend architecture, see the Backend Architecture section below.
|
||||
|
||||
# Backend Architecture
|
||||
|
||||
_**Disclaimer**: The backend architecture is a work in progress and is subject to change. The following diagram shows the current architecture of the backend based on the commit that is shown in the footer of the diagram._
|
||||
|
||||
```mermaid
|
||||
classDiagram
|
||||
class Agent {
|
||||
<<abstract>>
|
||||
+sandbox_plugins: list[PluginRequirement]
|
||||
}
|
||||
class CodeActAgent {
|
||||
+tools
|
||||
}
|
||||
Agent <|-- CodeActAgent
|
||||
|
||||
class EventStream
|
||||
class Observation
|
||||
class Action
|
||||
Action --> Observation
|
||||
Agent --> EventStream
|
||||
|
||||
class Runtime {
|
||||
+connect()
|
||||
+send_action_for_execution()
|
||||
}
|
||||
class ActionExecutionClient {
|
||||
+_send_action_server_request()
|
||||
}
|
||||
class DockerRuntime
|
||||
class LocalRuntime
|
||||
class RemoteRuntime
|
||||
Runtime <|-- ActionExecutionClient
|
||||
ActionExecutionClient <|-- DockerRuntime
|
||||
ActionExecutionClient <|-- LocalRuntime
|
||||
ActionExecutionClient <|-- RemoteRuntime
|
||||
|
||||
class ActionExecutionServer {
|
||||
+/execute_action
|
||||
+/alive
|
||||
}
|
||||
class BashSession
|
||||
class JupyterPlugin
|
||||
class BrowserEnv
|
||||
ActionExecutionServer --> BashSession
|
||||
ActionExecutionServer --> JupyterPlugin
|
||||
ActionExecutionServer --> BrowserEnv
|
||||
|
||||
Agent --> Runtime
|
||||
Runtime ..> ActionExecutionServer : REST
|
||||
```
|
||||

|
||||
|
||||
<details>
|
||||
<summary>Updating this Diagram</summary>
|
||||
<div>
|
||||
We maintain architecture diagrams inline with Mermaid in this MDX.
|
||||
The generation of the backend architecture diagram is partially automated.
|
||||
The diagram is generated from the type hints in the code using the py2puml
|
||||
tool. The diagram is then manually reviewed, adjusted and exported to PNG
|
||||
and SVG.
|
||||
|
||||
Guidance:
|
||||
- Edit the Mermaid blocks directly (flowchart/classDiagram).
|
||||
- Quote labels and edge text for GitHub preview compatibility.
|
||||
- Keep relationships concise and reflect stable abstractions (agents, runtime client/server, plugins).
|
||||
- Verify accuracy against code:
|
||||
- openhands/runtime/impl/action_execution/action_execution_client.py
|
||||
- openhands/runtime/impl/docker/docker_runtime.py
|
||||
- openhands/runtime/impl/local/local_runtime.py
|
||||
- openhands/runtime/action_execution_server.py
|
||||
- openhands/runtime/plugins/*
|
||||
- Build docs locally or view on GitHub to confirm diagrams render.
|
||||
## Prerequisites
|
||||
|
||||
- Running python environment in which openhands is executable
|
||||
(according to the instructions in the README.md file in the root of the repository)
|
||||
- [py2puml](https://github.com/lucsorel/py2puml) installed
|
||||
|
||||
## Steps
|
||||
|
||||
1. Autogenerate the diagram by running the following command from the root of the repository:
|
||||
`py2puml openhands openhands > docs/architecture/backend_architecture.puml`
|
||||
|
||||
2. Open the generated file in a PlantUML editor, e.g. Visual Studio Code with the PlantUML extension or [PlantText](https://www.planttext.com/)
|
||||
|
||||
3. Review the generated PUML and make all necessary adjustments to the diagram (add missing parts, fix mistakes, improve positioning).
|
||||
_py2puml creates the diagram based on the type hints in the code, so missing or incorrect type hints may result in an incomplete or incorrect diagram._
|
||||
|
||||
4. Review the diff between the new and the previous diagram and manually check if the changes are correct.
|
||||
_Make sure not to remove parts that were manually added to the diagram in the past and are still relevant._
|
||||
|
||||
5. Add the commit hash of the commit that was used to generate the diagram to the diagram footer.
|
||||
|
||||
6. Export the diagram as PNG and SVG files and replace the existing diagrams in the `docs/architecture` directory. This can be done with (e.g. [PlantText](https://www.planttext.com/))
|
||||
|
||||
</div>
|
||||
</details>
|
||||
|
||||
@@ -52,7 +52,7 @@ graph TD
|
||||
2. Image Building: OpenHands builds a new Docker image (the "OH runtime image") based on the user-provided image. This new image includes OpenHands-specific code, primarily the "runtime client"
|
||||
3. Container Launch: When OpenHands starts, it launches a Docker container using the OH runtime image
|
||||
4. Action Execution Server Initialization: The action execution server initializes an `ActionExecutor` inside the container, setting up necessary components like a bash shell and loading any specified plugins
|
||||
5. Communication: The OpenHands backend (client: `openhands/runtime/impl/action_execution/action_execution_client.py`; runtimes: `openhands/runtime/impl/docker/docker_runtime.py`, `openhands/runtime/impl/local/local_runtime.py`) communicates with the action execution server over RESTful API, sending actions and receiving observations
|
||||
5. Communication: The OpenHands backend (`openhands/runtime/impl/eventstream/eventstream_runtime.py`) communicates with the action execution server over RESTful API, sending actions and receiving observations
|
||||
6. Action Execution: The runtime client receives actions from the backend, executes them in the sandboxed environment, and sends back observations
|
||||
7. Observation Return: The action execution server sends execution results back to the OpenHands backend as observations
|
||||
|
||||
@@ -72,7 +72,7 @@ Check out the [relevant code](https://github.com/All-Hands-AI/OpenHands/blob/mai
|
||||
### Image Tagging System
|
||||
|
||||
OpenHands uses a three-tag system for its runtime images to balance reproducibility with flexibility.
|
||||
The tags are:
|
||||
Tags may be in one of 2 formats:
|
||||
|
||||
- **Versioned Tag**: `oh_v{openhands_version}_{base_image}` (e.g.: `oh_v0.9.9_nikolaik_s_python-nodejs_t_python3.12-nodejs22`)
|
||||
- **Lock Tag**: `oh_v{openhands_version}_{16_digit_lock_hash}` (e.g.: `oh_v0.9.9_1234567890abcdef`)
|
||||
@@ -119,52 +119,18 @@ This tagging approach allows OpenHands to efficiently manage both development an
|
||||
2. The system can quickly rebuild images when minor changes occur (by leveraging recent compatible images)
|
||||
3. The **lock** tag (e.g., `runtime:oh_v0.9.3_1234567890abcdef`) always points to the latest build for a particular base image, dependency, and OpenHands version combination
|
||||
|
||||
## Volume mounts: named volumes and overlay
|
||||
|
||||
OpenHands supports both bind mounts and Docker named volumes in SandboxConfig.volumes:
|
||||
|
||||
- Bind mount: "/abs/host/path:/container/path[:mode]"
|
||||
- Named volume: "volume:<name>:/container/path[:mode]" or any non-absolute host spec treated as a named volume
|
||||
|
||||
Overlay mode (copy-on-write layer) is supported for bind mounts by appending ":overlay" to the mode (e.g., ":ro,overlay").
|
||||
To enable overlay COW, set SANDBOX_VOLUME_OVERLAYS to a writable host directory; per-container upper/work dirs are created under it. If SANDBOX_VOLUME_OVERLAYS is unset, overlay mounts are skipped.
|
||||
|
||||
Implementation references:
|
||||
- openhands/runtime/impl/docker/docker_runtime.py (named volumes in _build_docker_run_args; overlay mounts in _process_overlay_mounts)
|
||||
- openhands/core/config/sandbox_config.py (volumes field)
|
||||
|
||||
|
||||
## Runtime Plugin System
|
||||
|
||||
The OpenHands Runtime supports a plugin system that allows for extending functionality and customizing the runtime environment. Plugins are initialized when the action execution server starts up inside the runtime.
|
||||
The OpenHands Runtime supports a plugin system that allows for extending functionality and customizing the runtime environment. Plugins are initialized when the runtime client starts up.
|
||||
|
||||
## Ports and URLs
|
||||
Check [an example of Jupyter plugin here](https://github.com/All-Hands-AI/OpenHands/blob/ecf4aed28b0cf7c18d4d8ff554883ba182fc6bdd/openhands/runtime/plugins/jupyter/__init__.py#L21-L55) if you want to implement your own plugin.
|
||||
|
||||
- Host port allocation uses file-locked ranges for stability and concurrency:
|
||||
- Main runtime port: find_available_port_with_lock on configured range
|
||||
- VSCode port: SandboxConfig.sandbox.vscode_port if provided, else find_available_port_with_lock in VSCODE_PORT_RANGE
|
||||
- App ports: two additional ranges for plugin/web apps
|
||||
- DOCKER_HOST_ADDR (if set) adjusts how URLs are formed for LocalRuntime/Docker environments.
|
||||
- VSCode URL is exposed with a connection token from the action execution server endpoint /vscode/connection_token and rendered as:
|
||||
- Docker/Local: http://localhost:{port}/?tkn={token}&folder={workspace_mount_path_in_sandbox}
|
||||
- RemoteRuntime: scheme://vscode-{host}/?tkn={token}&folder={workspace_mount_path_in_sandbox}
|
||||
|
||||
References:
|
||||
- openhands/runtime/impl/docker/docker_runtime.py (port ranges, locking, DOCKER_HOST_ADDR, vscode_url)
|
||||
- openhands/runtime/impl/local/local_runtime.py (vscode_url factory)
|
||||
- openhands/runtime/impl/remote/remote_runtime.py (vscode_url mapping)
|
||||
- openhands/runtime/action_execution_server.py (/vscode/connection_token)
|
||||
|
||||
|
||||
Examples:
|
||||
- Jupyter: openhands/runtime/plugins/jupyter/__init__.py (JupyterPlugin, Kernel Gateway)
|
||||
- VS Code: openhands/runtime/plugins/vscode/* (VSCodePlugin, exposes tokenized URL)
|
||||
- Agent Skills: openhands/runtime/plugins/agent_skills/*
|
||||
*More details about the Plugin system are still under construction - contributions are welcomed!*
|
||||
|
||||
Key aspects of the plugin system:
|
||||
|
||||
1. Plugin Definition: Plugins are defined as Python classes that inherit from a base `Plugin` class
|
||||
2. Plugin Registration: Available plugins are registered in `openhands/runtime/plugins/__init__.py` via `ALL_PLUGINS`
|
||||
2. Plugin Registration: Available plugins are registered in an `ALL_PLUGINS` dictionary
|
||||
3. Plugin Specification: Plugins are associated with `Agent.sandbox_plugins: list[PluginRequirement]`. Users can specify which plugins to load when initializing the runtime
|
||||
4. Initialization: Plugins are initialized asynchronously when the runtime starts and are accessible to actions
|
||||
5. Usage: Plugins extend capabilities (e.g., Jupyter for IPython cells); the server exposes any web endpoints (ports) via host port mapping
|
||||
4. Initialization: Plugins are initialized asynchronously when the runtime client starts
|
||||
5. Usage: The runtime client can use initialized plugins to extend its capabilities (e.g., the JupyterPlugin for running IPython cells)
|
||||
|
||||
@@ -65,7 +65,7 @@ To send follow-up messages for the same conversation, mention `@openhands` in a
|
||||
|
||||
Conversation is started by mentioning `@openhands`.
|
||||
|
||||

|
||||

|
||||
|
||||
### See agent response and send follow up messages
|
||||
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
# Confirmation Mode and Security Analyzers
|
||||
|
||||
OpenHands provides a security framework to help protect users from potentially risky actions through **Confirmation Mode** and **Security Analyzers**. This system analyzes agent actions and prompts users for confirmation when high-risk operations are detected.
|
||||
|
||||
## Overview
|
||||
|
||||
The security system consists of two main components:
|
||||
|
||||
1. **Confirmation Mode**: When enabled, the agent will pause and ask for user confirmation before executing actions that are flagged as high-risk by the security analyzer.
|
||||
|
||||
2. **Security Analyzers**: These are modules that evaluate the risk level of agent actions and determine whether user confirmation is required.
|
||||
|
||||
## Configuration
|
||||
|
||||
### CLI
|
||||
In CLI mode, confirmation is enabled by default. You will have an option to uses the LLM Analyzer and will automatically confirm LOW and MEDIUM risk actions, only prompting for HIGH risk actions.
|
||||
|
||||
## Security Analyzers
|
||||
|
||||
OpenHands includes multiple analyzers:
|
||||
|
||||
- **No Analyzer**: Do not use any security analyzer. The agent will prompt you to confirm *EVERY* action.
|
||||
- **LLM Risk Analyzer** (default): Uses the same LLM as the agent to assess action risk levels
|
||||
- **Invariant Analyzer**: Uses Invariant Labs' policy engine to evaluate action traces against security policies
|
||||
|
||||
### LLM Risk Analyzer
|
||||
The default analyzer that leverages the agent's LLM to evaluate the security risk of each action. It considers the action type, parameters, and context to assign risk levels.
|
||||
|
||||
### Invariant Analyzer
|
||||
An advanced analyzer that:
|
||||
- Collects conversation events and parses them into a trace
|
||||
- Checks the trace against an Invariant policy to classify risk (low, medium, high)
|
||||
- Manages an Invariant server container automatically if needed
|
||||
- Supports optional browsing-alignment and harmful-content checks
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Action Analysis**: When the agent wants to perform an action, the selected security analyzer evaluates its risk level.
|
||||
|
||||
2. **Risk Assessment**: The analyzer returns one of three risk levels:
|
||||
- **LOW**: Action proceeds without confirmation
|
||||
- **MEDIUM**: Action proceeds without confirmation (may be configurable in future)
|
||||
- **HIGH**: Action is paused, and user confirmation is requested
|
||||
|
||||
3. **User Confirmation**: For high-risk actions, a confirmation dialog appears with:
|
||||
- Description of the action
|
||||
- Risk assessment explanation
|
||||
- Options to approve or deny action
|
||||
|
||||
4. **Action Execution**: Based on user response:
|
||||
- **Approve**: Action proceeds as planned
|
||||
- **Deny**: Action is cancelled
|
||||
@@ -119,7 +119,7 @@ The conversation history will be saved in `~/.openhands/sessions`.
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -128,8 +128,8 @@ docker run -it \
|
||||
-v ~/.openhands:/.openhands \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
|
||||
python -m openhands.cli.entry --override-cli-mode true
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
|
||||
python -m openhands.cli.main --override-cli-mode true
|
||||
```
|
||||
|
||||
<Note>
|
||||
|
||||
@@ -61,7 +61,7 @@ export GITHUB_TOKEN="your-token" # Required for repository operations
|
||||
# Run OpenHands
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -73,7 +73,7 @@ docker run -it \
|
||||
-v ~/.openhands:/.openhands \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -68,23 +68,23 @@ Download and install the LM Studio desktop app from [lmstudio.ai](https://lmstud
|
||||
1. Check [the installation guide](/usage/local-setup) and ensure all prerequisites are met before running OpenHands, then run:
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
2. Wait until the server is running (see log below):
|
||||
```
|
||||
Digest: sha256:e72f9baecb458aedb9afc2cd5bc935118d1868719e55d50da73190d3a85c674f
|
||||
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
Starting OpenHands...
|
||||
Running OpenHands as root
|
||||
14:22:13 - openhands:INFO: server_config.py:50 - Using config class None
|
||||
|
||||
@@ -109,17 +109,17 @@ Note that you'll still need `uv` installed for the default MCP servers to work p
|
||||
<Accordion title="Docker Command (Click to expand)">
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
@@ -130,28 +130,3 @@ docker run # ... \
|
||||
<Note>
|
||||
**Docker Desktop Required**: Network isolation features, including custom networks and `host.docker.internal` routing, require Docker Desktop. Docker Engine alone does not support these features on localhost across custom networks. If you're using Docker Engine without Docker Desktop, network isolation may not work as expected.
|
||||
</Note>
|
||||
|
||||
### Sidecar Containers
|
||||
|
||||
If you want to run sidecar containers to the sandbox 'runner' containers without exposing the sandbox containers to the host network, you can use the `SANDBOX_ADDITIONAL_NETWORKS` environment variable to specify additional Docker network names that should be added to the sandbox containers.
|
||||
|
||||
```bash
|
||||
docker network create openhands-sccache
|
||||
|
||||
docker run -d \
|
||||
--hostname openhandsredis \
|
||||
--network openhands-sccache \
|
||||
redis
|
||||
|
||||
docker run # ...
|
||||
-e SANDBOX_ADDITIONAL_NETWORKS='["openhands-sccache"]' \
|
||||
# ...
|
||||
```
|
||||
|
||||
Then all sandbox instances will have to access a shared redis instance at `openhandsredis:6379`.
|
||||
|
||||
#### Docker Compose gotcha
|
||||
|
||||
Note that Docker Compose adds a prefix (a scope) by default to created networks, which is not taken into account by the additional networks config. Therefore when using docker compose you have to either:
|
||||
- specify a network name via the `name` field to remove the scoping (https://docs.docker.com/reference/compose-file/networks/#name)
|
||||
- or provide the scope within the given config (e.g. `SANDBOX_ADDITIONAL_NETWORKS: '["myscope_openhands-sccache"]'` where `myscope` is the docker-compose assigned prefix).
|
||||
@@ -9,8 +9,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -61,15 +60,18 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> OpenHandsConfig:
|
||||
# Create config with EDA-specific container image
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
# Override the container image for EDA
|
||||
config.sandbox.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
@@ -144,7 +146,7 @@ def process_instance(
|
||||
|
||||
logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
|
||||
test_result = game.reward()
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -17,8 +17,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -41,12 +40,19 @@ from openhands.utils.async_utils import call_async_from_sync
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> OpenHandsConfig:
|
||||
# Create config with agent_bench-specific container image
|
||||
config = get_openhands_config_for_eval(metadata=metadata)
|
||||
|
||||
# Override the container image for agent_bench
|
||||
config.sandbox.base_container_image = 'python:3.12-slim'
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-slim'
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
@@ -267,7 +273,7 @@ def process_instance(
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -17,8 +17,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -51,10 +49,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.11-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -243,7 +246,7 @@ def process_instance(
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -15,8 +15,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -62,10 +60,15 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = BIOCODER_BENCH_CONTAINER_IMAGE
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -291,7 +294,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -18,8 +18,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -76,10 +74,15 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -419,7 +422,7 @@ def process_instance(
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -11,8 +11,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -41,8 +39,14 @@ def get_config(
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata, runtime='docker', sandbox_config=sandbox_config
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -84,7 +88,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -16,8 +16,6 @@ from evaluation.utils.shared import (
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -115,11 +113,16 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -477,7 +480,7 @@ def process_instance(
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -17,8 +17,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -66,10 +64,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -291,7 +294,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
test_result = complete_runtime(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
|
||||
@@ -22,8 +22,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -61,10 +59,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'nikolaik/python-nodejs:python3.12-nodejs22'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
if metadata.agent_config:
|
||||
@@ -266,7 +269,7 @@ Here is the task:
|
||||
'model_answer': model_answer,
|
||||
'ground_truth': instance['Final answer'],
|
||||
}
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -12,8 +12,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -44,10 +42,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -105,7 +108,7 @@ def process_instance(
|
||||
# attempt to parse model_answer
|
||||
ast_eval_fn = instance['ast_eval']
|
||||
correct, hallucination = ast_eval_fn(instance_id, model_answer_raw)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
logger.info(
|
||||
f'Final message: {model_answer_raw} | Correctness: {correct} | Hallucination: {hallucination}'
|
||||
)
|
||||
|
||||
@@ -30,8 +30,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -65,10 +63,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -289,7 +292,7 @@ Ok now its time to start solving the question. Good luck!
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -23,8 +23,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -86,10 +84,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -245,7 +248,7 @@ def process_instance(
|
||||
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
|
||||
@@ -16,7 +16,6 @@ import ruamel.yaml
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
@@ -38,10 +37,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -22,8 +22,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -49,10 +47,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -332,7 +335,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
|
||||
@@ -10,8 +10,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -53,10 +51,15 @@ def get_config(
|
||||
'$OH_INTERPRETER_PATH -m pip install scitools-pyke'
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -244,7 +247,7 @@ def process_instance(
|
||||
)
|
||||
test_result['final_message'] = final_message
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -13,8 +13,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -59,10 +57,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-miniwob:v1.0'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -171,7 +174,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Instruction is the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -15,8 +15,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -111,10 +109,15 @@ def get_config(
|
||||
f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -202,7 +205,7 @@ def process_instance(
|
||||
task_state = state.extra_data['task_state']
|
||||
logger.info('Task state: ' + str(task_state.to_dict()))
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -26,8 +26,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -81,10 +79,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'public.ecr.aws/i5g0m1f6/ml-bench'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -247,7 +250,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
|
||||
test_result = complete_runtime(runtime)
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -88,9 +87,13 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = get_openhands_config_for_eval(
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -342,11 +341,16 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -31,7 +31,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -175,10 +174,15 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -12,8 +12,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -65,10 +63,16 @@ def get_config(
|
||||
sandbox_config.base_container_image = (
|
||||
'docker.io/xingyaoww/openhands-eval-scienceagentbench'
|
||||
)
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
max_budget_per_task=4,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -214,7 +218,7 @@ If the program uses some packages that are incompatible, please figure out alter
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -93,9 +93,6 @@ export USE_HINT_TEXT=true # Ignore this if you are not sure.
|
||||
|
||||
# Specify a condenser configuration for memory management (default: NoOpCondenser)
|
||||
export EVAL_CONDENSER=summarizer_for_eval # Name of the condenser config group in config.toml
|
||||
|
||||
# Specify the instruction prompt template file name
|
||||
export INSTRUCTION_TEMPLATE_NAME=swe_custom.j2 # Name of the file in the swe_bench/prompts folder.
|
||||
```
|
||||
|
||||
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
|
||||
|
||||
@@ -19,7 +19,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -84,9 +83,13 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = get_openhands_config_for_eval(
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -32,7 +32,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -109,9 +108,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
|
||||
llm_model = metadata.llm_config.model
|
||||
|
||||
# Determine the template file based on mode and LLM
|
||||
if metadata.instruction_template_name:
|
||||
template_name = metadata.instruction_template_name
|
||||
elif mode.startswith('swt'):
|
||||
if mode.startswith('swt'):
|
||||
template_name = 'swt.j2'
|
||||
elif mode == 'swe':
|
||||
if 'gpt-4.1' in llm_model:
|
||||
@@ -125,7 +122,6 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
|
||||
logger.error(f'Unexpected evaluation mode: {mode}. Falling back to default.')
|
||||
template_name = 'swe_default.j2'
|
||||
|
||||
logger.debug(f'Using instruction template file: {template_name}')
|
||||
# Set up Jinja2 environment
|
||||
# Assuming templates are in 'evaluation/benchmarks/swe_bench/prompts' relative to this script
|
||||
prompts_dir = os.path.join(os.path.dirname(__file__), 'prompts')
|
||||
@@ -228,11 +224,16 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -180,7 +179,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
instruction = message_action.content
|
||||
|
||||
@@ -20,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -200,11 +199,16 @@ def get_config(
|
||||
'REPO_PATH': f'/workspace/{workspace_dir_name}/',
|
||||
}
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -37,7 +37,6 @@ from evaluation.benchmarks.testgeneval.utils import load_testgeneval_dataset
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -59,21 +58,20 @@ def get_config(instance: pd.Series) -> OpenHandsConfig:
|
||||
f'Invalid container image for instance {instance["instance_id_swebench"]}.'
|
||||
)
|
||||
logger.info(f'Using instance container image: {base_container_image}.')
|
||||
|
||||
# Create custom sandbox config for testgeneval with specific requirements
|
||||
sandbox_config = SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
timeout=1800, # Longer timeout than default (300)
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY'),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
return OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'eventstream'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
timeout=1800,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY'),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
return get_openhands_config_for_eval(
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'), # Different default runtime
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -25,7 +25,6 @@ from evaluation.utils.shared import (
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -127,26 +126,29 @@ def get_config(
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
|
||||
sandbox_config = SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'eventstream'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -12,10 +12,7 @@ import tempfile
|
||||
import yaml
|
||||
from browsing import pre_login
|
||||
|
||||
from evaluation.utils.shared import (
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
)
|
||||
from evaluation.utils.shared import get_default_sandbox_config_for_eval
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
LLMConfig,
|
||||
@@ -45,17 +42,19 @@ def get_config(
|
||||
sandbox_config.enable_auto_lint = True
|
||||
# If the web services are running on the host machine, this must be set to True
|
||||
sandbox_config.use_host_network = True
|
||||
config = get_openhands_config_for_eval(
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
max_budget_per_task=4,
|
||||
max_iterations=100,
|
||||
save_trajectory_path=os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# we mount trajectories path so that trajectories, generated by OpenHands
|
||||
# controller, can be accessible to the evaluator file in the runtime container
|
||||
sandbox_config=sandbox_config,
|
||||
workspace_mount_path=mount_path_on_host,
|
||||
workspace_mount_path_in_sandbox='/outputs',
|
||||
)
|
||||
config.save_trajectory_path = os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
)
|
||||
config.max_budget_per_task = 4
|
||||
config.set_llm_config(llm_config)
|
||||
if agent_config:
|
||||
config.set_agent_config(agent_config)
|
||||
|
||||
@@ -11,8 +11,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -45,10 +43,15 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -131,7 +134,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
correct = eval_answer(str(model_answer_raw), str(answer))
|
||||
logger.info(f'Final message: {model_answer_raw} | Correctness: {correct}')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -20,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -161,11 +160,16 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -12,8 +12,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -74,10 +72,16 @@ def get_config(
|
||||
'VWA_WIKIPEDIA': f'{base_url}:8888',
|
||||
'VWA_HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
attach_to_existing=True,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -175,7 +179,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Instruction obtained from the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -12,8 +12,6 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -66,10 +64,15 @@ def get_config(
|
||||
'MAP': f'{base_url}:3000',
|
||||
'HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -160,7 +163,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Instruction is the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -9,8 +9,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -46,12 +44,18 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox_config=sandbox_config,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
# debug
|
||||
debug=True,
|
||||
)
|
||||
config.debug = True
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
metadata.llm_config, metadata.eval_output_dir, instance_id
|
||||
@@ -131,7 +135,7 @@ def process_instance(
|
||||
assert len(histories) > 0, 'History should not be empty'
|
||||
|
||||
test_result: TestResult = test_class.verify_result(runtime, histories)
|
||||
metrics = get_metrics(state)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
finally:
|
||||
runtime.close()
|
||||
|
||||
|
||||
@@ -53,7 +53,6 @@ class EvalMetadata(BaseModel):
|
||||
data_split: str | None = None
|
||||
details: dict[str, Any] | None = None
|
||||
condenser_config: CondenserConfig | None = None
|
||||
instruction_template_name: str | None = None
|
||||
|
||||
|
||||
class EvalOutput(BaseModel):
|
||||
@@ -206,7 +205,6 @@ def make_metadata(
|
||||
condenser_config=condenser_config
|
||||
if condenser_config
|
||||
else NoOpCondenserConfig(),
|
||||
instruction_template_name=os.environ.get('INSTRUCTION_TEMPLATE_NAME'),
|
||||
)
|
||||
metadata_json = metadata.model_dump_json()
|
||||
logger.info(f'Metadata: {metadata_json}')
|
||||
@@ -668,23 +666,8 @@ def is_fatal_runtime_error(error: str | None) -> bool:
|
||||
|
||||
|
||||
def get_metrics(state: State) -> dict[str, Any]:
|
||||
"""Extract metrics for evaluations.
|
||||
|
||||
Prefer ConversationStats (source of truth) and fall back to state.metrics for
|
||||
backward compatibility.
|
||||
"""
|
||||
metrics: dict[str, Any]
|
||||
try:
|
||||
if getattr(state, 'conversation_stats', None):
|
||||
combined = state.conversation_stats.get_combined_metrics()
|
||||
metrics = combined.get()
|
||||
elif getattr(state, 'metrics', None):
|
||||
metrics = state.metrics.get()
|
||||
else:
|
||||
metrics = {}
|
||||
except Exception:
|
||||
metrics = state.metrics.get() if getattr(state, 'metrics', None) else {}
|
||||
|
||||
"""Extract metrics from the state."""
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
metrics['condenser'] = get_condensation_metadata(state)
|
||||
return metrics
|
||||
|
||||
@@ -703,79 +686,3 @@ def get_default_sandbox_config_for_eval() -> SandboxConfig:
|
||||
remote_runtime_enable_retries=True,
|
||||
remote_runtime_class='sysbox',
|
||||
)
|
||||
|
||||
|
||||
def get_openhands_config_for_eval(
|
||||
metadata: EvalMetadata | None = None,
|
||||
sandbox_config: SandboxConfig | None = None,
|
||||
runtime: str | None = None,
|
||||
max_iterations: int | None = None,
|
||||
default_agent: str | None = None,
|
||||
enable_browser: bool = False,
|
||||
workspace_base: str | None = None,
|
||||
workspace_mount_path: str | None = None,
|
||||
):
|
||||
"""Create an OpenHandsConfig with common patterns used across evaluation scripts.
|
||||
|
||||
This function provides a standardized way to create OpenHands configurations
|
||||
for evaluation runs, with sensible defaults that match the patterns used in
|
||||
most run_infer.py scripts. Individual evaluation scripts can override specific
|
||||
attributes as needed.
|
||||
|
||||
Args:
|
||||
metadata: EvalMetadata containing agent class, max iterations, etc.
|
||||
sandbox_config: Custom sandbox config. If None, uses get_default_sandbox_config_for_eval()
|
||||
runtime: Runtime type. If None, uses environment RUNTIME or 'docker'
|
||||
max_iterations: Max iterations for the agent. If None, uses metadata.max_iterations
|
||||
default_agent: Agent class name. If None, uses metadata.agent_class
|
||||
enable_browser: Whether to enable browser functionality
|
||||
workspace_base: Workspace base path. Defaults to None
|
||||
workspace_mount_path: Workspace mount path. Defaults to None
|
||||
|
||||
Returns:
|
||||
OpenHandsConfig: Configured for evaluation with eval-specific overrides applied
|
||||
"""
|
||||
# Defer import to avoid circular imports at module load time
|
||||
from openhands.core.config.openhands_config import (
|
||||
OpenHandsConfig as _OHConfig, # type: ignore
|
||||
)
|
||||
|
||||
# Use provided sandbox config or get default
|
||||
if sandbox_config is None:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
|
||||
# Extract values from metadata if provided
|
||||
if metadata is not None:
|
||||
if max_iterations is None:
|
||||
max_iterations = metadata.max_iterations
|
||||
if default_agent is None:
|
||||
default_agent = metadata.agent_class
|
||||
|
||||
# Use environment runtime or default
|
||||
if runtime is None:
|
||||
runtime = os.environ.get('RUNTIME', 'docker')
|
||||
|
||||
# Provide sensible defaults if still None
|
||||
if default_agent is None:
|
||||
default_agent = 'CodeActAgent'
|
||||
if max_iterations is None:
|
||||
max_iterations = 50
|
||||
|
||||
# Always use repo-local .eval_sessions directory (absolute path)
|
||||
eval_store = os.path.abspath(os.path.join(os.getcwd(), '.eval_sessions'))
|
||||
|
||||
# Create the base config with evaluation-specific overrides
|
||||
config = _OHConfig(
|
||||
default_agent=default_agent,
|
||||
run_as_openhands=False,
|
||||
runtime=runtime,
|
||||
max_iterations=max_iterations,
|
||||
enable_browser=enable_browser,
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=workspace_base,
|
||||
workspace_mount_path=workspace_mount_path,
|
||||
file_store='local',
|
||||
file_store_path=eval_store,
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
@@ -232,16 +232,13 @@ describe("RepositorySelectionForm", () => {
|
||||
renderForm();
|
||||
|
||||
const dropdown = await screen.findByTestId("repo-dropdown");
|
||||
const input = dropdown.querySelector(
|
||||
'input[type="text"]',
|
||||
) as HTMLInputElement;
|
||||
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
|
||||
expect(input).toBeInTheDocument();
|
||||
|
||||
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
|
||||
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
|
||||
"kubernetes/kubernetes",
|
||||
3,
|
||||
"github",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -271,16 +268,13 @@ describe("RepositorySelectionForm", () => {
|
||||
renderForm();
|
||||
|
||||
const dropdown = await screen.findByTestId("repo-dropdown");
|
||||
const input = dropdown.querySelector(
|
||||
'input[type="text"]',
|
||||
) as HTMLInputElement;
|
||||
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
|
||||
expect(input).toBeInTheDocument();
|
||||
|
||||
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
|
||||
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
|
||||
"kubernetes/kubernetes",
|
||||
3,
|
||||
"github",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
+12
-78
@@ -444,38 +444,28 @@ describe("MicroagentManagement", () => {
|
||||
expect(filePath2).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render add microagent button", async () => {
|
||||
it("should display add microagent button in repository accordion", async () => {
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Check that add microagent buttons are present
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
expect(addButtons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should open modal when add button is clicked", async () => {
|
||||
it("should open add microagent modal when add button is clicked", async () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("repository-name-tooltip")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1302,18 +1292,11 @@ describe("MicroagentManagement", () => {
|
||||
it("should render add microagent button", async () => {
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Check that add microagent buttons are present
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
expect(addButtons.length).toBeGreaterThan(0);
|
||||
@@ -1323,18 +1306,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1385,18 +1361,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1416,18 +1385,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1446,18 +1408,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1486,18 +1441,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1520,18 +1468,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
@@ -1553,18 +1494,11 @@ describe("MicroagentManagement", () => {
|
||||
const user = userEvent.setup();
|
||||
renderMicroagentManagement();
|
||||
|
||||
// Wait for repositories to be loaded and processed
|
||||
// Wait for repositories to be loaded
|
||||
await waitFor(() => {
|
||||
expect(mockUseUserRepositories).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Wait for repositories to be displayed in the accordion
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByTestId("repository-name-tooltip"),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Find and click the first add microagent button
|
||||
const addButtons = screen.getAllByTestId("add-microagent-button");
|
||||
await user.click(addButtons[0]);
|
||||
|
||||
@@ -79,35 +79,6 @@ describe("Content", () => {
|
||||
expect(screen.getByTestId("set-indicator")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should conditionally show security analyzer based on confirmation mode", async () => {
|
||||
renderLlmSettingsScreen();
|
||||
await screen.findByTestId("llm-settings-screen");
|
||||
|
||||
const confirmation = screen.getByTestId("enable-confirmation-mode-switch");
|
||||
|
||||
// Initially confirmation mode is false, so security analyzer should not be visible
|
||||
expect(confirmation).not.toBeChecked();
|
||||
expect(
|
||||
screen.queryByTestId("security-analyzer-input"),
|
||||
).not.toBeInTheDocument();
|
||||
|
||||
// Enable confirmation mode
|
||||
await userEvent.click(confirmation);
|
||||
expect(confirmation).toBeChecked();
|
||||
|
||||
// Security analyzer should now be visible
|
||||
screen.getByTestId("security-analyzer-input");
|
||||
|
||||
// Disable confirmation mode again
|
||||
await userEvent.click(confirmation);
|
||||
expect(confirmation).not.toBeChecked();
|
||||
|
||||
// Security analyzer should be hidden again
|
||||
expect(
|
||||
screen.queryByTestId("security-analyzer-input"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Advanced form", () => {
|
||||
@@ -136,6 +107,7 @@ describe("Content", () => {
|
||||
within(advancedForm).getByTestId("llm-api-key-input");
|
||||
within(advancedForm).getByTestId("llm-api-key-help-anchor-advanced");
|
||||
within(advancedForm).getByTestId("agent-input");
|
||||
within(advancedForm).getByTestId("enable-confirmation-mode-switch");
|
||||
within(advancedForm).getByTestId("enable-memory-condenser-switch");
|
||||
|
||||
await userEvent.click(advancedSwitch);
|
||||
@@ -158,6 +130,9 @@ describe("Content", () => {
|
||||
const baseUrl = screen.getByTestId("base-url-input");
|
||||
const apiKey = screen.getByTestId("llm-api-key-input");
|
||||
const agent = screen.getByTestId("agent-input");
|
||||
const confirmation = screen.getByTestId(
|
||||
"enable-confirmation-mode-switch",
|
||||
);
|
||||
const condensor = screen.getByTestId("enable-memory-condenser-switch");
|
||||
|
||||
expect(model).toHaveValue("openhands/claude-sonnet-4-20250514");
|
||||
@@ -165,7 +140,15 @@ describe("Content", () => {
|
||||
expect(apiKey).toHaveValue("");
|
||||
expect(apiKey).toHaveProperty("placeholder", "");
|
||||
expect(agent).toHaveValue("CodeActAgent");
|
||||
expect(confirmation).not.toBeChecked();
|
||||
expect(condensor).toBeChecked();
|
||||
|
||||
// check that security analyzer is present
|
||||
expect(
|
||||
screen.queryByTestId("security-analyzer-input"),
|
||||
).not.toBeInTheDocument();
|
||||
await userEvent.click(confirmation);
|
||||
screen.getByTestId("security-analyzer-input");
|
||||
});
|
||||
|
||||
it("should render the advanced form if existings settings are advanced", async () => {
|
||||
@@ -194,7 +177,7 @@ describe("Content", () => {
|
||||
agent: "CoActAgent",
|
||||
confirmation_mode: true,
|
||||
enable_default_condenser: false,
|
||||
security_analyzer: "none",
|
||||
security_analyzer: "mock-invariant",
|
||||
});
|
||||
|
||||
renderLlmSettingsScreen();
|
||||
@@ -220,7 +203,7 @@ describe("Content", () => {
|
||||
expect(agent).toHaveValue("CoActAgent");
|
||||
expect(confirmation).toBeChecked();
|
||||
expect(condensor).not.toBeChecked();
|
||||
expect(securityAnalyzer).toHaveValue("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
expect(securityAnalyzer).toHaveValue("mock-invariant");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -310,7 +293,7 @@ describe("Form submission", () => {
|
||||
// select security analyzer
|
||||
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
|
||||
await userEvent.click(securityAnalyzer);
|
||||
const securityAnalyzerOption = screen.getByText("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
const securityAnalyzerOption = screen.getByText("mock-invariant");
|
||||
await userEvent.click(securityAnalyzerOption);
|
||||
|
||||
const submitButton = screen.getByTestId("submit-button");
|
||||
@@ -323,7 +306,7 @@ describe("Form submission", () => {
|
||||
agent: "CoActAgent",
|
||||
confirmation_mode: true,
|
||||
enable_default_condenser: false,
|
||||
security_analyzer: null,
|
||||
security_analyzer: "mock-invariant",
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -392,10 +375,8 @@ describe("Form submission", () => {
|
||||
const baseUrl = await screen.findByTestId("base-url-input");
|
||||
const apiKey = await screen.findByTestId("llm-api-key-input");
|
||||
const agent = await screen.findByTestId("agent-input");
|
||||
const condensor = await screen.findByTestId("enable-memory-condenser-switch");
|
||||
|
||||
// Confirmation mode switch is now in basic settings, always visible
|
||||
const confirmation = await screen.findByTestId("enable-confirmation-mode-switch");
|
||||
const condensor = await screen.findByTestId("enable-memory-condenser-switch");
|
||||
|
||||
// enter custom model
|
||||
await userEvent.type(model, "-mini");
|
||||
@@ -470,17 +451,14 @@ describe("Form submission", () => {
|
||||
// select security analyzer
|
||||
const securityAnalyzer = await screen.findByTestId("security-analyzer-input");
|
||||
await userEvent.click(securityAnalyzer);
|
||||
const securityAnalyzerOption = screen.getByText("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
const securityAnalyzerOption = screen.getByText("mock-invariant");
|
||||
await userEvent.click(securityAnalyzerOption);
|
||||
expect(securityAnalyzer).toHaveValue("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
expect(securityAnalyzer).toHaveValue("mock-invariant");
|
||||
|
||||
expect(submitButton).not.toBeDisabled();
|
||||
|
||||
// revert back to original value
|
||||
await userEvent.click(securityAnalyzer);
|
||||
const originalSecurityAnalyzerOption = screen.getByText("SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT");
|
||||
await userEvent.click(originalSecurityAnalyzerOption);
|
||||
expect(securityAnalyzer).toHaveValue("SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT");
|
||||
await userEvent.clear(securityAnalyzer);
|
||||
expect(securityAnalyzer).toHaveValue("");
|
||||
expect(submitButton).toBeDisabled();
|
||||
});
|
||||
|
||||
@@ -574,7 +552,7 @@ describe("Form submission", () => {
|
||||
expect.objectContaining({
|
||||
llm_model: "openhands/claude-sonnet-4-20250514",
|
||||
llm_base_url: "",
|
||||
confirmation_mode: true, // Confirmation mode is now a basic setting, should be preserved
|
||||
confirmation_mode: false,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -107,7 +107,9 @@ describe("Content", () => {
|
||||
expect(screen.queryByTestId("add-secret-button")).not.toBeInTheDocument(),
|
||||
);
|
||||
const button = await screen.findByTestId("connect-git-button");
|
||||
expect(button).toHaveAttribute("href", "/settings/integrations");
|
||||
await userEvent.click(button);
|
||||
|
||||
screen.getByTestId("git-settings-screen");
|
||||
});
|
||||
|
||||
it("should render an empty table when there are no existing secrets", async () => {
|
||||
|
||||
@@ -136,7 +136,7 @@ describe("Settings Screen", () => {
|
||||
"secrets",
|
||||
"api keys",
|
||||
];
|
||||
const sectionsToExclude = ["llm"];
|
||||
const sectionsToExclude = ["llm", "mcp"];
|
||||
|
||||
renderSettingsScreen();
|
||||
|
||||
|
||||
@@ -29,5 +29,23 @@ describe("hasAdvancedSettingsSet", () => {
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("CONFIRMATION_MODE is true", () => {
|
||||
expect(
|
||||
hasAdvancedSettingsSet({
|
||||
...DEFAULT_SETTINGS,
|
||||
CONFIRMATION_MODE: true,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("SECURITY_ANALYZER is set", () => {
|
||||
expect(
|
||||
hasAdvancedSettingsSet({
|
||||
...DEFAULT_SETTINGS,
|
||||
SECURITY_ANALYZER: "test",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.54.0",
|
||||
"version": "0.53.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.54.0",
|
||||
"version": "0.53.0",
|
||||
"dependencies": {
|
||||
"@heroui/react": "^2.8.2",
|
||||
"@heroui/use-infinite-scroll": "^2.2.10",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.54.0",
|
||||
"version": "0.53.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import { useCallback, useMemo, useState } from "react";
|
||||
import { useCallback, useMemo, useRef } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Provider } from "../../types/settings";
|
||||
import { useGitRepositories } from "../../hooks/query/use-git-repositories";
|
||||
import { useSearchRepositories } from "../../hooks/query/use-search-repositories";
|
||||
import { useDebounce } from "../../hooks/use-debounce";
|
||||
import OpenHands from "../../api/open-hands";
|
||||
import { GitRepository } from "../../types/git";
|
||||
import {
|
||||
@@ -21,6 +19,10 @@ export interface GitRepositoryDropdownProps {
|
||||
onChange?: (repository?: GitRepository) => void;
|
||||
}
|
||||
|
||||
interface SearchCache {
|
||||
[key: string]: GitRepository[];
|
||||
}
|
||||
|
||||
export function GitRepositoryDropdown({
|
||||
provider,
|
||||
value,
|
||||
@@ -31,20 +33,6 @@ export function GitRepositoryDropdown({
|
||||
onChange,
|
||||
}: GitRepositoryDropdownProps) {
|
||||
const { t } = useTranslation();
|
||||
const [searchInput, setSearchInput] = useState("");
|
||||
const debouncedSearchInput = useDebounce(searchInput, 300);
|
||||
|
||||
// Process search input to handle URLs
|
||||
const processedSearchInput = useMemo(() => {
|
||||
if (debouncedSearchInput.startsWith("https://")) {
|
||||
const match = debouncedSearchInput.match(
|
||||
/https:\/\/[^/]+\/([^/]+\/[^/]+)/,
|
||||
);
|
||||
return match ? match[1] : debouncedSearchInput;
|
||||
}
|
||||
return debouncedSearchInput;
|
||||
}, [debouncedSearchInput]);
|
||||
|
||||
const {
|
||||
data,
|
||||
fetchNextPage,
|
||||
@@ -57,10 +45,6 @@ export function GitRepositoryDropdown({
|
||||
enabled: !disabled,
|
||||
});
|
||||
|
||||
// Search query for processed input (handles URLs)
|
||||
const { data: searchData, isLoading: isSearchLoading } =
|
||||
useSearchRepositories(processedSearchInput, provider);
|
||||
|
||||
const allOptions: AsyncSelectOption[] = useMemo(
|
||||
() =>
|
||||
data?.pages
|
||||
@@ -74,83 +58,75 @@ export function GitRepositoryDropdown({
|
||||
[data],
|
||||
);
|
||||
|
||||
const searchOptions: AsyncSelectOption[] = useMemo(
|
||||
() =>
|
||||
searchData
|
||||
? searchData.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}))
|
||||
: [],
|
||||
[searchData],
|
||||
);
|
||||
// Keep track of search results
|
||||
const searchCache = useRef<SearchCache>({});
|
||||
|
||||
const selectedOption = useMemo(() => {
|
||||
// First check in loaded pages
|
||||
const option = allOptions.find((opt) => opt.value === value);
|
||||
if (option) return option;
|
||||
|
||||
// If not found, check in search results
|
||||
const searchOption = searchOptions.find((opt) => opt.value === value);
|
||||
if (searchOption) return searchOption;
|
||||
// If not found, check in search cache
|
||||
const repo = Object.values(searchCache.current)
|
||||
.flat()
|
||||
.find((r) => r.id === value);
|
||||
|
||||
if (repo) {
|
||||
return {
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}, [allOptions, searchOptions, value]);
|
||||
}, [allOptions, value]);
|
||||
|
||||
const loadOptions = useCallback(
|
||||
async (inputValue: string): Promise<AsyncSelectOption[]> => {
|
||||
// Update search input to trigger debounced search
|
||||
setSearchInput(inputValue);
|
||||
|
||||
// If empty input, show all loaded options
|
||||
if (!inputValue.trim()) {
|
||||
return allOptions;
|
||||
}
|
||||
|
||||
// For very short inputs, do local filtering
|
||||
if (inputValue.length < 2) {
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(inputValue.toLowerCase()),
|
||||
);
|
||||
}
|
||||
|
||||
// Handle URL inputs by performing direct search
|
||||
// If it looks like a URL, extract the repo name and search
|
||||
if (inputValue.startsWith("https://")) {
|
||||
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
|
||||
if (match) {
|
||||
const repoName = match[1];
|
||||
try {
|
||||
// Perform direct search for URL-based inputs
|
||||
const repositories = await OpenHands.searchGitRepositories(
|
||||
repoName,
|
||||
3,
|
||||
provider,
|
||||
);
|
||||
return repositories.map((repo) => ({
|
||||
value: repo.full_name,
|
||||
label: repo.full_name,
|
||||
data: repo,
|
||||
}));
|
||||
} catch (error) {
|
||||
// Fall back to local filtering if search fails
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(repoName.toLowerCase()),
|
||||
);
|
||||
}
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
repoName,
|
||||
3,
|
||||
);
|
||||
// Cache the search results
|
||||
searchCache.current[repoName] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// For regular text inputs, use hook-based search results if available
|
||||
if (searchOptions.length > 0 && processedSearchInput === inputValue) {
|
||||
return searchOptions;
|
||||
// For any other input, search via API
|
||||
if (inputValue.length >= 2) {
|
||||
// Only search if at least 2 characters
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
inputValue,
|
||||
10,
|
||||
);
|
||||
// Cache the search results
|
||||
searchCache.current[inputValue] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}));
|
||||
}
|
||||
|
||||
// Fallback to local filtering while search is loading
|
||||
// For very short inputs, do local filtering
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(inputValue.toLowerCase()),
|
||||
);
|
||||
},
|
||||
[allOptions, searchOptions, processedSearchInput, provider],
|
||||
[allOptions],
|
||||
);
|
||||
|
||||
const handleChange = (option: AsyncSelectOption | null) => {
|
||||
@@ -166,7 +142,9 @@ export function GitRepositoryDropdown({
|
||||
|
||||
// If not found, check in search results
|
||||
if (!repo) {
|
||||
repo = searchData?.find((r) => r.id === option.value);
|
||||
repo = Object.values(searchCache.current)
|
||||
.flat()
|
||||
.find((r) => r.id === option.value);
|
||||
}
|
||||
|
||||
onChange?.(repo);
|
||||
@@ -189,7 +167,7 @@ export function GitRepositoryDropdown({
|
||||
errorMessage={errorMessage}
|
||||
disabled={disabled}
|
||||
isClearable={false}
|
||||
isLoading={isLoading || isFetchingNextPage || isSearchLoading}
|
||||
isLoading={isLoading || isLoading || isFetchingNextPage}
|
||||
cacheOptions
|
||||
defaultOptions={allOptions}
|
||||
onChange={handleChange}
|
||||
|
||||
@@ -7,10 +7,11 @@ import { ConversationCard } from "../conversation-panel/conversation-card";
|
||||
import { Provider } from "#/types/settings";
|
||||
|
||||
interface ControlsProps {
|
||||
setSecurityOpen: (isOpen: boolean) => void;
|
||||
showSecurityLock: boolean;
|
||||
}
|
||||
|
||||
export function Controls({ showSecurityLock }: ControlsProps) {
|
||||
export function Controls({ setSecurityOpen, showSecurityLock }: ControlsProps) {
|
||||
const { data: conversation } = useActiveConversation();
|
||||
const [contextMenuOpen, setContextMenuOpen] = React.useState(false);
|
||||
|
||||
@@ -20,7 +21,9 @@ export function Controls({ showSecurityLock }: ControlsProps) {
|
||||
<AgentControlBar />
|
||||
<AgentStatusBar />
|
||||
|
||||
{showSecurityLock && <SecurityLock />}
|
||||
{showSecurityLock && (
|
||||
<SecurityLock onClick={() => setSecurityOpen(true)} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
<ConversationCard
|
||||
|
||||
@@ -1,28 +1,17 @@
|
||||
import { IoLockClosed } from "react-icons/io5";
|
||||
import { Tooltip } from "@heroui/react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Link } from "react-router";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
export function SecurityLock() {
|
||||
const { t } = useTranslation();
|
||||
interface SecurityLockProps {
|
||||
onClick: () => void;
|
||||
}
|
||||
|
||||
export function SecurityLock({ onClick }: SecurityLockProps) {
|
||||
return (
|
||||
<Tooltip
|
||||
content={
|
||||
<div className="max-w-xs p-2">
|
||||
{t(I18nKey.SETTINGS$CONFIRMATION_MODE_LOCK_TOOLTIP)}
|
||||
</div>
|
||||
}
|
||||
placement="top"
|
||||
<div
|
||||
className="cursor-pointer hover:opacity-80 transition-all"
|
||||
style={{ marginRight: "8px" }}
|
||||
onClick={onClick}
|
||||
>
|
||||
<Link
|
||||
to="/settings"
|
||||
className="mr-2 cursor-pointer hover:opacity-80 transition-all"
|
||||
aria-label={t(I18nKey.SETTINGS$TITLE)}
|
||||
>
|
||||
<IoLockClosed size={20} />
|
||||
</Link>
|
||||
</Tooltip>
|
||||
<IoLockClosed size={20} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
+1
-1
@@ -17,7 +17,7 @@ export function MicroagentManagementAccordionTitle({
|
||||
<TooltipButton
|
||||
tooltip={repository.full_name}
|
||||
ariaLabel={repository.full_name}
|
||||
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[200px] translate-y-[-1px]"
|
||||
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[232px]"
|
||||
testId="repository-name-tooltip"
|
||||
placement="bottom"
|
||||
>
|
||||
|
||||
+14
-11
@@ -7,6 +7,8 @@ import {
|
||||
} from "#/state/microagent-management-slice";
|
||||
import { RootState } from "#/store";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import PlusIcon from "#/icons/plus.svg?react";
|
||||
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
|
||||
|
||||
interface MicroagentManagementAddMicroagentButtonProps {
|
||||
repository: GitRepository;
|
||||
@@ -23,22 +25,23 @@ export function MicroagentManagementAddMicroagentButton({
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const handleClick = (e: React.MouseEvent<HTMLButtonElement>) => {
|
||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
e.stopPropagation();
|
||||
dispatch(setAddMicroagentModalVisible(!addMicroagentModalVisible));
|
||||
dispatch(setSelectedRepository(repository));
|
||||
};
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClick}
|
||||
className="translate-y-[-1px]"
|
||||
data-testid="add-microagent-button"
|
||||
>
|
||||
<span className="text-sm font-normal leading-5 text-[#8480FF] cursor-pointer hover:text-[#6C63FF] transition-colors duration-200">
|
||||
{t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
</span>
|
||||
</button>
|
||||
<div onClick={handleClick}>
|
||||
<TooltipButton
|
||||
tooltip={t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
ariaLabel={t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
className="p-0 min-w-0 h-6 w-6 flex items-center justify-center bg-transparent cursor-pointer"
|
||||
testId="add-microagent-button"
|
||||
placement="bottom"
|
||||
>
|
||||
<PlusIcon width={22} height={22} />
|
||||
</TooltipButton>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
-40
@@ -1,5 +1,4 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useDispatch, useSelector } from "react-redux";
|
||||
import { MicroagentManagementSidebar } from "./microagent-management-sidebar";
|
||||
import { MicroagentManagementMain } from "./microagent-management-main";
|
||||
@@ -26,12 +25,6 @@ import { GitRepository } from "#/types/git";
|
||||
import { queryClient } from "#/query-client-config";
|
||||
import { Provider } from "#/types/settings";
|
||||
import { MicroagentManagementLearnThisRepoModal } from "./microagent-management-learn-this-repo-modal";
|
||||
import {
|
||||
displaySuccessToast,
|
||||
displayErrorToast,
|
||||
} from "#/utils/custom-toast-handlers";
|
||||
import { getFirstPRUrl } from "#/utils/parse-pr-url";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
// Handle error events
|
||||
const isErrorEvent = (evt: unknown): evt is { error: true; message: string } =>
|
||||
@@ -119,8 +112,6 @@ export function MicroagentManagementContent() {
|
||||
learnThisRepoModalVisible,
|
||||
} = useSelector((state: RootState) => state.microagentManagement);
|
||||
|
||||
const { t } = useTranslation();
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { createConversationAndSubscribe, isPending } =
|
||||
@@ -168,37 +159,6 @@ export function MicroagentManagementContent() {
|
||||
? (selectedRepository as GitRepository).full_name
|
||||
: "";
|
||||
|
||||
// Check if agent is running and ready to work
|
||||
if (
|
||||
isOpenHandsEvent(socketEvent) &&
|
||||
isAgentStateChangeObservation(socketEvent) &&
|
||||
socketEvent.extras.agent_state === AgentState.RUNNING
|
||||
) {
|
||||
displaySuccessToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT),
|
||||
);
|
||||
}
|
||||
|
||||
// Check if agent has finished and we have a PR
|
||||
if (isOpenHandsEvent(socketEvent) && isFinishAction(socketEvent)) {
|
||||
const prUrl = getFirstPRUrl(socketEvent.args.final_thought || "");
|
||||
if (prUrl) {
|
||||
displaySuccessToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW),
|
||||
);
|
||||
} else {
|
||||
// Agent finished but no PR found
|
||||
displaySuccessToast(t(I18nKey.MICROAGENT_MANAGEMENT$PR_NOT_CREATED));
|
||||
}
|
||||
}
|
||||
|
||||
// Handle error events
|
||||
if (isErrorEvent(socketEvent) || isAgentStatusError(socketEvent)) {
|
||||
displayErrorToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT),
|
||||
);
|
||||
}
|
||||
|
||||
if (shouldInvalidateConversationsList(socketEvent)) {
|
||||
invalidateConversationsList(repositoryName);
|
||||
}
|
||||
|
||||
+2
-14
@@ -65,18 +65,6 @@ export function MicroagentManagementRepoMicroagents({
|
||||
}
|
||||
}, [conversations]);
|
||||
|
||||
useEffect(
|
||||
() => () => {
|
||||
dispatch(
|
||||
setSelectedMicroagentItem({
|
||||
microagent: null,
|
||||
conversation: null,
|
||||
}),
|
||||
);
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
// Show loading only when both queries are loading
|
||||
const isLoading = isLoadingMicroagents || isLoadingConversations;
|
||||
|
||||
@@ -94,7 +82,7 @@ export function MicroagentManagementRepoMicroagents({
|
||||
// If there's an error with microagents, show the learn this repo component
|
||||
if (isError) {
|
||||
return (
|
||||
<div>
|
||||
<div className="pb-4">
|
||||
<MicroagentManagementLearnThisRepo repository={repository} />
|
||||
</div>
|
||||
);
|
||||
@@ -105,7 +93,7 @@ export function MicroagentManagementRepoMicroagents({
|
||||
const totalItems = numberOfMicroagents + numberOfConversations;
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className="pb-4">
|
||||
{totalItems === 0 && (
|
||||
<MicroagentManagementLearnThisRepo repository={repository} />
|
||||
)}
|
||||
|
||||
+2
-4
@@ -97,10 +97,8 @@ export function MicroagentManagementRepositories({
|
||||
variant="splitted"
|
||||
className="w-full px-0 gap-3"
|
||||
itemClasses={{
|
||||
base: "shadow-none bg-transparent cursor-pointer px-0",
|
||||
trigger: "cursor-pointer gap-2 py-3",
|
||||
indicator:
|
||||
"flex items-center justify-center p-0.5 pr-[3px] text-white hover:bg-[#454545] rounded transition-colors duration-200 rotate-180",
|
||||
base: "shadow-none bg-transparent border border-[#ffffff40] rounded-[6px] cursor-pointer",
|
||||
trigger: "cursor-pointer gap-1",
|
||||
}}
|
||||
selectionMode="multiple"
|
||||
>
|
||||
|
||||
-110
@@ -1,110 +0,0 @@
|
||||
import { render, screen, fireEvent } from "@testing-library/react";
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
import { MCPServerForm } from "../mcp-server-form";
|
||||
|
||||
// i18n mock
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({
|
||||
t: (key: string) => key,
|
||||
}),
|
||||
}));
|
||||
|
||||
describe("MCPServerForm validation", () => {
|
||||
const noop = () => {};
|
||||
|
||||
it("rejects invalid env var lines and allows blank lines", () => {
|
||||
const onSubmit = vi.fn();
|
||||
|
||||
render(
|
||||
<MCPServerForm
|
||||
mode="add"
|
||||
server={{ id: "tmp", type: "stdio" }}
|
||||
existingServers={[]}
|
||||
onSubmit={onSubmit}
|
||||
onCancel={noop}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Fill required fields
|
||||
fireEvent.change(screen.getByTestId("name-input"), {
|
||||
target: { value: "my-server" },
|
||||
});
|
||||
fireEvent.change(screen.getByTestId("command-input"), {
|
||||
target: { value: "npx" },
|
||||
});
|
||||
|
||||
// Invalid env entries mixed with blank lines
|
||||
fireEvent.change(screen.getByTestId("env-input"), {
|
||||
target: { value: "invalid\n\nKEY=value\n=novalue\nKEY_ONLY=" },
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByTestId("submit-button"));
|
||||
|
||||
// Should show invalid env format error
|
||||
expect(
|
||||
screen.getByText("SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT"),
|
||||
).toBeInTheDocument();
|
||||
|
||||
// Fix env with valid lines and blank lines
|
||||
fireEvent.change(screen.getByTestId("env-input"), {
|
||||
target: { value: "KEY=value\n\nANOTHER=123" },
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByTestId("submit-button"));
|
||||
|
||||
// No error; submit should be called
|
||||
expect(onSubmit).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("rejects duplicate URLs across sse/shttp types", () => {
|
||||
const onSubmit = vi.fn();
|
||||
|
||||
const existingServers = [
|
||||
{ id: "sse-1", type: "sse" as const, url: "https://api.example.com" },
|
||||
{ id: "shttp-1", type: "shttp" as const, url: "https://x.example.com" },
|
||||
];
|
||||
|
||||
const r1 = render(
|
||||
<MCPServerForm
|
||||
mode="add"
|
||||
server={{ id: "tmp", type: "sse" }}
|
||||
existingServers={existingServers}
|
||||
onSubmit={onSubmit}
|
||||
onCancel={noop}
|
||||
/>,
|
||||
);
|
||||
|
||||
fireEvent.change(screen.getAllByTestId("url-input")[0], {
|
||||
target: { value: "https://api.example.com" },
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getAllByTestId("submit-button")[0]);
|
||||
expect(
|
||||
screen.getByText("SETTINGS$MCP_ERROR_URL_DUPLICATE"),
|
||||
).toBeInTheDocument();
|
||||
|
||||
// Unmount first form, then check shttp duplicate
|
||||
r1.unmount();
|
||||
|
||||
const r2 = render(
|
||||
<MCPServerForm
|
||||
mode="add"
|
||||
server={{ id: "tmp2", type: "shttp" }}
|
||||
existingServers={existingServers}
|
||||
onSubmit={onSubmit}
|
||||
onCancel={noop}
|
||||
/>,
|
||||
);
|
||||
|
||||
fireEvent.change(screen.getAllByTestId("url-input")[0], {
|
||||
target: { value: "https://api.example.com" },
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getAllByTestId("submit-button")[0]);
|
||||
expect(
|
||||
screen.getByText("SETTINGS$MCP_ERROR_URL_DUPLICATE"),
|
||||
).toBeInTheDocument();
|
||||
|
||||
r2.unmount();
|
||||
});
|
||||
});
|
||||
-158
@@ -1,158 +0,0 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
import { MCPServerList } from "../mcp-server-list";
|
||||
|
||||
// Mock react-i18next
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({
|
||||
t: (key: string) => key,
|
||||
}),
|
||||
}));
|
||||
|
||||
const mockServers = [
|
||||
{
|
||||
id: "sse-0",
|
||||
type: "sse" as const,
|
||||
url: "https://very-long-url-that-could-cause-layout-overflow.example.com/api/v1/mcp/server/endpoint/with/many/path/segments",
|
||||
},
|
||||
{
|
||||
id: "stdio-0",
|
||||
type: "stdio" as const,
|
||||
name: "test-stdio-server",
|
||||
command: "python",
|
||||
args: ["-m", "test_server"],
|
||||
},
|
||||
];
|
||||
|
||||
describe("MCPServerList", () => {
|
||||
it("should render servers with proper layout structure", () => {
|
||||
const mockOnEdit = vi.fn();
|
||||
const mockOnDelete = vi.fn();
|
||||
|
||||
render(
|
||||
<MCPServerList
|
||||
servers={mockServers}
|
||||
onEdit={mockOnEdit}
|
||||
onDelete={mockOnDelete}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Check that the table structure is rendered
|
||||
const table = screen.getByRole("table");
|
||||
expect(table).toBeInTheDocument();
|
||||
expect(table).toHaveClass("w-full");
|
||||
|
||||
// Check that server items are rendered
|
||||
const serverItems = screen.getAllByTestId("mcp-server-item");
|
||||
expect(serverItems).toHaveLength(2);
|
||||
|
||||
// Check that action buttons are present for each server
|
||||
const editButtons = screen.getAllByTestId("edit-mcp-server-button");
|
||||
const deleteButtons = screen.getAllByTestId("delete-mcp-server-button");
|
||||
expect(editButtons).toHaveLength(2);
|
||||
expect(deleteButtons).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("should render empty state when no servers", () => {
|
||||
const mockOnEdit = vi.fn();
|
||||
const mockOnDelete = vi.fn();
|
||||
|
||||
render(
|
||||
<MCPServerList
|
||||
servers={[]}
|
||||
onEdit={mockOnEdit}
|
||||
onDelete={mockOnDelete}
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(screen.getByText("SETTINGS$MCP_NO_SERVERS")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should handle long URLs without breaking layout", () => {
|
||||
const longUrlServer = {
|
||||
id: "sse-0",
|
||||
type: "sse" as const,
|
||||
url: "https://extremely-long-url-that-would-previously-cause-layout-overflow-and-push-action-buttons-out-of-view.example.com/api/v1/mcp/server/endpoint/with/many/path/segments/and/query/parameters?param1=value1¶m2=value2¶m3=value3",
|
||||
};
|
||||
|
||||
const mockOnEdit = vi.fn();
|
||||
const mockOnDelete = vi.fn();
|
||||
|
||||
render(
|
||||
<MCPServerList
|
||||
servers={[longUrlServer]}
|
||||
onEdit={mockOnEdit}
|
||||
onDelete={mockOnDelete}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Check that action buttons are still present and accessible
|
||||
const editButton = screen.getByTestId("edit-mcp-server-button");
|
||||
const deleteButton = screen.getByTestId("delete-mcp-server-button");
|
||||
|
||||
expect(editButton).toBeInTheDocument();
|
||||
expect(deleteButton).toBeInTheDocument();
|
||||
|
||||
// Check that the URL is properly displayed with title attribute for accessibility
|
||||
const detailsCells = screen.getAllByTitle(longUrlServer.url);
|
||||
expect(detailsCells).toHaveLength(2); // Name and Details columns both have the URL
|
||||
|
||||
// Check that both name and details cells use truncation and have title for tooltip
|
||||
const [nameCell, detailsCell] = detailsCells;
|
||||
expect(nameCell).toHaveClass("truncate");
|
||||
expect(detailsCell).toHaveClass("truncate");
|
||||
});
|
||||
|
||||
it("should display command and arguments for STDIO servers", () => {
|
||||
const stdioServer = {
|
||||
id: "stdio-1",
|
||||
type: "stdio" as const,
|
||||
name: "test-server",
|
||||
command: "python",
|
||||
args: ["-m", "test_module", "--verbose"],
|
||||
};
|
||||
|
||||
const mockOnEdit = vi.fn();
|
||||
const mockOnDelete = vi.fn();
|
||||
|
||||
render(
|
||||
<MCPServerList
|
||||
servers={[stdioServer]}
|
||||
onEdit={mockOnEdit}
|
||||
onDelete={mockOnDelete}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Check that the server details show command + arguments
|
||||
const expectedDetails = "python -m test_module --verbose";
|
||||
expect(screen.getByTitle(expectedDetails)).toBeInTheDocument();
|
||||
expect(screen.getByText(expectedDetails)).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should fallback to server name for STDIO servers without command", () => {
|
||||
const stdioServer = {
|
||||
id: "stdio-2",
|
||||
type: "stdio" as const,
|
||||
name: "fallback-server",
|
||||
};
|
||||
|
||||
const mockOnEdit = vi.fn();
|
||||
const mockOnDelete = vi.fn();
|
||||
|
||||
render(
|
||||
<MCPServerList
|
||||
servers={[stdioServer]}
|
||||
onEdit={mockOnEdit}
|
||||
onDelete={mockOnDelete}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Check that the server details show the server name as fallback
|
||||
// Both name and details columns will have the same value, so we expect 2 elements
|
||||
const fallbackElements = screen.getAllByTitle("fallback-server");
|
||||
expect(fallbackElements).toHaveLength(2);
|
||||
|
||||
const fallbackTextElements = screen.getAllByText("fallback-server");
|
||||
expect(fallbackTextElements).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,78 @@
|
||||
import React, { useState } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { MCPConfig } from "#/types/settings";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { MCPSSEServers } from "./mcp-sse-servers";
|
||||
import { MCPStdioServers } from "./mcp-stdio-servers";
|
||||
import { MCPJsonEditor } from "./mcp-json-editor";
|
||||
import { BrandButton } from "../brand-button";
|
||||
|
||||
interface MCPConfigEditorProps {
|
||||
mcpConfig?: MCPConfig;
|
||||
onChange: (config: MCPConfig) => void;
|
||||
}
|
||||
|
||||
export function MCPConfigEditor({ mcpConfig, onChange }: MCPConfigEditorProps) {
|
||||
const { t } = useTranslation();
|
||||
const [isEditing, setIsEditing] = useState(false);
|
||||
const handleConfigChange = (newConfig: MCPConfig) => {
|
||||
onChange(newConfig);
|
||||
setIsEditing(false);
|
||||
};
|
||||
|
||||
const config = mcpConfig || { sse_servers: [], stdio_servers: [] };
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className="flex flex-col gap-2 mb-6">
|
||||
<div className="text-sm font-medium">
|
||||
{t(I18nKey.SETTINGS$MCP_TITLE)}
|
||||
</div>
|
||||
<p className="text-xs text-[#A3A3A3]">
|
||||
{t(I18nKey.SETTINGS$MCP_DESCRIPTION)}
|
||||
</p>
|
||||
</div>
|
||||
{!isEditing && (
|
||||
<div className="flex justify-between items-center mb-4">
|
||||
<div className="flex items-center">
|
||||
<BrandButton
|
||||
type="button"
|
||||
variant="primary"
|
||||
onClick={() => setIsEditing(true)}
|
||||
>
|
||||
{t(I18nKey.SETTINGS$MCP_EDIT_CONFIGURATION)}
|
||||
</BrandButton>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<div>
|
||||
{isEditing ? (
|
||||
<MCPJsonEditor
|
||||
mcpConfig={mcpConfig}
|
||||
onChange={handleConfigChange}
|
||||
onCancel={() => setIsEditing(false)}
|
||||
/>
|
||||
) : (
|
||||
<>
|
||||
<div className="flex flex-col gap-6">
|
||||
<div>
|
||||
<MCPSSEServers servers={config.sse_servers} />
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<MCPStdioServers servers={config.stdio_servers} />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{config.sse_servers.length === 0 &&
|
||||
config.stdio_servers.length === 0 && (
|
||||
<div className="mt-4 p-2 bg-yellow-50 border border-yellow-200 rounded-md text-sm text-yellow-700">
|
||||
{t(I18nKey.SETTINGS$MCP_NO_SERVERS_CONFIGURED)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
import React, { useState, useRef, useEffect } from "react";
|
||||
import { useTranslation, Trans } from "react-i18next";
|
||||
import { MCPConfig } from "#/types/settings";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { BrandButton } from "../brand-button";
|
||||
import { cn } from "#/utils/utils";
|
||||
|
||||
interface MCPJsonEditorProps {
|
||||
mcpConfig?: MCPConfig;
|
||||
onChange: (config: MCPConfig) => void;
|
||||
onCancel: () => void;
|
||||
}
|
||||
|
||||
const MCP_DEFAULT_CONFIG: MCPConfig = {
|
||||
sse_servers: [],
|
||||
stdio_servers: [],
|
||||
};
|
||||
|
||||
export function MCPJsonEditor({
|
||||
mcpConfig,
|
||||
onChange,
|
||||
onCancel,
|
||||
}: MCPJsonEditorProps) {
|
||||
const { t } = useTranslation();
|
||||
const [configText, setConfigText] = useState(() =>
|
||||
mcpConfig
|
||||
? JSON.stringify(mcpConfig, null, 2)
|
||||
: JSON.stringify(MCP_DEFAULT_CONFIG, null, 2),
|
||||
);
|
||||
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
textareaRef.current?.focus();
|
||||
}, []);
|
||||
|
||||
const handleTextChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
|
||||
setConfigText(e.target.value);
|
||||
};
|
||||
|
||||
const handleSave = () => {
|
||||
try {
|
||||
const newConfig = JSON.parse(configText);
|
||||
|
||||
// Validate the structure
|
||||
if (!newConfig.sse_servers || !Array.isArray(newConfig.sse_servers)) {
|
||||
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_SSE_ARRAY));
|
||||
}
|
||||
|
||||
if (!newConfig.stdio_servers || !Array.isArray(newConfig.stdio_servers)) {
|
||||
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_STDIO_ARRAY));
|
||||
}
|
||||
|
||||
// Validate SSE servers
|
||||
for (const server of newConfig.sse_servers) {
|
||||
if (
|
||||
typeof server !== "string" &&
|
||||
(!server.url || typeof server.url !== "string")
|
||||
) {
|
||||
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_SSE_URL));
|
||||
}
|
||||
}
|
||||
|
||||
// Validate stdio servers
|
||||
for (const server of newConfig.stdio_servers) {
|
||||
if (!server.name || !server.command) {
|
||||
throw new Error(t(I18nKey.SETTINGS$MCP_ERROR_STDIO_PROPS));
|
||||
}
|
||||
}
|
||||
|
||||
onChange(newConfig);
|
||||
setError(null);
|
||||
} catch (e) {
|
||||
setError(
|
||||
e instanceof Error
|
||||
? e.message
|
||||
: t(I18nKey.SETTINGS$MCP_ERROR_INVALID_JSON),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
<p className="mb-2 text-sm text-gray-400">
|
||||
<Trans
|
||||
i18nKey={I18nKey.SETTINGS$MCP_CONFIG_DESCRIPTION}
|
||||
components={{
|
||||
a: (
|
||||
<a
|
||||
href="https://docs.all-hands.dev/usage/mcp"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-blue-400 hover:underline"
|
||||
>
|
||||
documentation
|
||||
</a>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</p>
|
||||
<textarea
|
||||
ref={textareaRef}
|
||||
className={cn(
|
||||
"w-full h-64 resize-y p-2 rounded-sm text-sm font-mono",
|
||||
"bg-tertiary border border-[#717888]",
|
||||
"placeholder:italic placeholder:text-tertiary-alt",
|
||||
"focus:outline-none focus:ring-1 focus:ring-primary",
|
||||
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
|
||||
)}
|
||||
value={configText}
|
||||
onChange={handleTextChange}
|
||||
spellCheck="false"
|
||||
/>
|
||||
{error && (
|
||||
<div className="mt-2 p-2 bg-red-100 border border-red-300 rounded-md text-sm text-red-700">
|
||||
<strong>{t(I18nKey.SETTINGS$MCP_CONFIG_ERROR)}</strong> {error}
|
||||
</div>
|
||||
)}
|
||||
<div className="mt-2 text-sm text-gray-400">
|
||||
<strong>{t(I18nKey.SETTINGS$MCP_CONFIG_EXAMPLE)}</strong>{" "}
|
||||
<code>
|
||||
{
|
||||
'{ "sse_servers": ["https://example-mcp-server.com/sse"], "stdio_servers": [{ "name": "fetch", "command": "uvx", "args": ["mcp-server-fetch"] }] }'
|
||||
}
|
||||
</code>
|
||||
</div>
|
||||
<div className="mt-4 flex justify-end gap-3">
|
||||
<BrandButton type="button" variant="secondary" onClick={onCancel}>
|
||||
{t(I18nKey.BUTTON$CANCEL)}
|
||||
</BrandButton>
|
||||
<BrandButton type="button" variant="primary" onClick={handleSave}>
|
||||
{t(I18nKey.SETTINGS$MCP_PREVIEW_CHANGES)}
|
||||
</BrandButton>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,376 +0,0 @@
|
||||
import React from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { SettingsInput } from "../settings-input";
|
||||
import { SettingsDropdownInput } from "../settings-dropdown-input";
|
||||
import { BrandButton } from "../brand-button";
|
||||
import { OptionalTag } from "../optional-tag";
|
||||
import { cn } from "#/utils/utils";
|
||||
|
||||
type MCPServerType = "sse" | "stdio" | "shttp";
|
||||
|
||||
interface MCPServerConfig {
|
||||
id: string;
|
||||
type: MCPServerType;
|
||||
name?: string;
|
||||
url?: string;
|
||||
api_key?: string;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
interface MCPServerFormProps {
|
||||
mode: "add" | "edit";
|
||||
server?: MCPServerConfig;
|
||||
existingServers?: MCPServerConfig[];
|
||||
onSubmit: (server: MCPServerConfig) => void;
|
||||
onCancel: () => void;
|
||||
}
|
||||
|
||||
export function MCPServerForm({
|
||||
mode,
|
||||
server,
|
||||
existingServers,
|
||||
onSubmit,
|
||||
onCancel,
|
||||
}: MCPServerFormProps) {
|
||||
const { t } = useTranslation();
|
||||
const [serverType, setServerType] = React.useState<MCPServerType>(
|
||||
server?.type || "sse",
|
||||
);
|
||||
const [error, setError] = React.useState<string | null>(null);
|
||||
|
||||
const serverTypeOptions = [
|
||||
{ key: "sse", label: t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SSE) },
|
||||
{ key: "stdio", label: t(I18nKey.SETTINGS$MCP_SERVER_TYPE_STDIO) },
|
||||
{ key: "shttp", label: t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SHTTP) },
|
||||
];
|
||||
|
||||
const validateUrl = (url: string): string | null => {
|
||||
if (!url) return t(I18nKey.SETTINGS$MCP_ERROR_URL_REQUIRED);
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
if (!["http:", "https:"].includes(urlObj.protocol)) {
|
||||
return t(I18nKey.SETTINGS$MCP_ERROR_URL_INVALID_PROTOCOL);
|
||||
}
|
||||
} catch {
|
||||
return t(I18nKey.SETTINGS$MCP_ERROR_URL_INVALID);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateName = (name: string): string | null => {
|
||||
if (!name) return t(I18nKey.SETTINGS$MCP_ERROR_NAME_REQUIRED);
|
||||
if (!/^[a-zA-Z0-9_-]+$/.test(name)) {
|
||||
return t(I18nKey.SETTINGS$MCP_ERROR_NAME_INVALID);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateNameUniqueness = (name: string): string | null => {
|
||||
if (!existingServers) return null;
|
||||
const shouldCheckUniqueness =
|
||||
mode === "add" || (mode === "edit" && server?.name !== name);
|
||||
if (!shouldCheckUniqueness) return null;
|
||||
|
||||
const existingStdioNames = existingServers
|
||||
.filter((s) => s.type === "stdio")
|
||||
.map((s) => s.name)
|
||||
.filter(Boolean);
|
||||
if (existingStdioNames.includes(name)) {
|
||||
return t(I18nKey.SETTINGS$MCP_ERROR_NAME_DUPLICATE);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateCommand = (command: string): string | null => {
|
||||
if (!command) return t(I18nKey.SETTINGS$MCP_ERROR_COMMAND_REQUIRED);
|
||||
if (command.includes(" ")) {
|
||||
return t(I18nKey.SETTINGS$MCP_ERROR_COMMAND_NO_SPACES);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateUrlUniqueness = (url: string): string | null => {
|
||||
if (!existingServers) return null;
|
||||
const originalUrl = server?.url;
|
||||
const changed = mode === "add" || (mode === "edit" && originalUrl !== url);
|
||||
if (!changed) return null;
|
||||
// For URL-based servers (sse/shttp), ensure URL is unique across both types
|
||||
const exists = existingServers.some(
|
||||
(s) => (s.type === "sse" || s.type === "shttp") && s.url === url,
|
||||
);
|
||||
if (exists) return t(I18nKey.SETTINGS$MCP_ERROR_URL_DUPLICATE);
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateEnvFormat = (envString: string): string | null => {
|
||||
if (!envString.trim()) return null;
|
||||
const lines = envString.split("\n");
|
||||
for (let i = 0; i < lines.length; i += 1) {
|
||||
const trimmed = lines[i].trim();
|
||||
if (trimmed) {
|
||||
const eq = trimmed.indexOf("=");
|
||||
if (eq === -1) return t(I18nKey.SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT);
|
||||
const key = trimmed.substring(0, eq).trim();
|
||||
if (!key) return t(I18nKey.SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateStdioServer = (formData: FormData): string | null => {
|
||||
const name = formData.get("name")?.toString().trim() || "";
|
||||
const command = formData.get("command")?.toString().trim() || "";
|
||||
const envString = formData.get("env")?.toString() || "";
|
||||
|
||||
const nameError = validateName(name);
|
||||
if (nameError) return nameError;
|
||||
|
||||
const uniquenessError = validateNameUniqueness(name);
|
||||
if (uniquenessError) return uniquenessError;
|
||||
|
||||
const commandError = validateCommand(command);
|
||||
if (commandError) return commandError;
|
||||
|
||||
// Validate environment variable format
|
||||
const envError = validateEnvFormat(envString);
|
||||
if (envError) return envError;
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const validateForm = (formData: FormData): string | null => {
|
||||
if (serverType === "sse" || serverType === "shttp") {
|
||||
const url = formData.get("url")?.toString().trim() || "";
|
||||
const urlError = validateUrl(url);
|
||||
if (urlError) return urlError;
|
||||
const urlDupError = validateUrlUniqueness(url);
|
||||
if (urlDupError) return urlDupError;
|
||||
return null;
|
||||
}
|
||||
|
||||
if (serverType === "stdio") {
|
||||
return validateStdioServer(formData);
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const parseEnvironmentVariables = (
|
||||
envString: string,
|
||||
): Record<string, string> => {
|
||||
const env: Record<string, string> = {};
|
||||
const input = envString.trim();
|
||||
if (!input) return env;
|
||||
|
||||
for (const line of input.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
const eq = trimmed.indexOf("=");
|
||||
const key = eq >= 0 ? trimmed.substring(0, eq).trim() : "";
|
||||
if (trimmed && eq !== -1 && key) {
|
||||
env[key] = trimmed.substring(eq + 1).trim();
|
||||
}
|
||||
}
|
||||
return env;
|
||||
};
|
||||
|
||||
const formatEnvironmentVariables = (env?: Record<string, string>): string => {
|
||||
if (!env) return "";
|
||||
return Object.entries(env)
|
||||
.map(([key, value]) => `${key}=${value}`)
|
||||
.join("\n");
|
||||
};
|
||||
|
||||
const handleSubmit = (event: React.FormEvent<HTMLFormElement>) => {
|
||||
event.preventDefault();
|
||||
setError(null);
|
||||
|
||||
const formData = new FormData(event.currentTarget);
|
||||
const validationError = validateForm(formData);
|
||||
|
||||
if (validationError) {
|
||||
setError(validationError);
|
||||
return;
|
||||
}
|
||||
|
||||
const baseConfig = {
|
||||
id: server?.id || `${serverType}-${Date.now()}`,
|
||||
type: serverType,
|
||||
};
|
||||
|
||||
if (serverType === "sse" || serverType === "shttp") {
|
||||
const url = formData.get("url")?.toString().trim();
|
||||
const apiKey = formData.get("api_key")?.toString().trim();
|
||||
|
||||
onSubmit({
|
||||
...baseConfig,
|
||||
url: url!,
|
||||
...(apiKey && { api_key: apiKey }),
|
||||
});
|
||||
} else if (serverType === "stdio") {
|
||||
const name = formData.get("name")?.toString().trim();
|
||||
const command = formData.get("command")?.toString().trim();
|
||||
const argsString = formData.get("args")?.toString().trim();
|
||||
const envString = formData.get("env")?.toString().trim();
|
||||
|
||||
const args = argsString
|
||||
? argsString
|
||||
.split("\n")
|
||||
.map((arg) => arg.trim())
|
||||
.filter(Boolean)
|
||||
: [];
|
||||
const env = parseEnvironmentVariables(envString || "");
|
||||
|
||||
onSubmit({
|
||||
...baseConfig,
|
||||
name: name!,
|
||||
command: command!,
|
||||
...(args.length > 0 && { args }),
|
||||
...(Object.keys(env).length > 0 && { env }),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const formTestId =
|
||||
mode === "add" ? "add-mcp-server-form" : "edit-mcp-server-form";
|
||||
|
||||
return (
|
||||
<form
|
||||
data-testid={formTestId}
|
||||
onSubmit={handleSubmit}
|
||||
className="flex flex-col items-start gap-6"
|
||||
>
|
||||
{mode === "add" && (
|
||||
<SettingsDropdownInput
|
||||
testId="server-type-dropdown"
|
||||
name="server-type"
|
||||
label={t(I18nKey.SETTINGS$MCP_SERVER_TYPE)}
|
||||
items={serverTypeOptions}
|
||||
selectedKey={serverType}
|
||||
onSelectionChange={(key) => setServerType(key as MCPServerType)}
|
||||
onInputChange={() => {}} // Prevent input changes
|
||||
isClearable={false}
|
||||
allowsCustomValue={false}
|
||||
required
|
||||
wrapperClassName={cn("w-full", "max-w-[680px]")}
|
||||
/>
|
||||
)}
|
||||
|
||||
{error && <p className="text-red-500 text-sm">{error}</p>}
|
||||
|
||||
{(serverType === "sse" || serverType === "shttp") && (
|
||||
<>
|
||||
<SettingsInput
|
||||
testId="url-input"
|
||||
name="url"
|
||||
type="url"
|
||||
label={t(I18nKey.SETTINGS$MCP_URL)}
|
||||
className="w-full max-w-[680px]"
|
||||
required
|
||||
defaultValue={server?.url || ""}
|
||||
placeholder="https://api.example.com"
|
||||
/>
|
||||
|
||||
<SettingsInput
|
||||
testId="api-key-input"
|
||||
name="api_key"
|
||||
type="password"
|
||||
label={t(I18nKey.SETTINGS$MCP_API_KEY)}
|
||||
className="w-full max-w-[680px]"
|
||||
showOptionalTag
|
||||
defaultValue={server?.api_key || ""}
|
||||
placeholder={t(I18nKey.SETTINGS$MCP_API_KEY_PLACEHOLDER)}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
|
||||
{serverType === "stdio" && (
|
||||
<>
|
||||
<SettingsInput
|
||||
testId="name-input"
|
||||
name="name"
|
||||
type="text"
|
||||
label={t(I18nKey.SETTINGS$MCP_NAME)}
|
||||
className="w-full max-w-[680px]"
|
||||
required
|
||||
defaultValue={server?.name || ""}
|
||||
placeholder="my-mcp-server"
|
||||
pattern="^[a-zA-Z0-9_-]+$"
|
||||
/>
|
||||
|
||||
<SettingsInput
|
||||
testId="command-input"
|
||||
name="command"
|
||||
type="text"
|
||||
label={t(I18nKey.SETTINGS$MCP_COMMAND)}
|
||||
className="w-full max-w-[680px]"
|
||||
required
|
||||
defaultValue={server?.command || ""}
|
||||
placeholder="npx"
|
||||
/>
|
||||
|
||||
<label className="flex flex-col gap-2.5 w-full max-w-[680px]">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm">
|
||||
{t(I18nKey.SETTINGS$MCP_COMMAND_ARGUMENTS)}
|
||||
</span>
|
||||
<OptionalTag />
|
||||
</div>
|
||||
<textarea
|
||||
data-testid="args-input"
|
||||
name="args"
|
||||
rows={3}
|
||||
defaultValue={server?.args?.join("\n") || ""}
|
||||
placeholder="arg1 arg2 arg3"
|
||||
className={cn(
|
||||
"bg-tertiary border border-[#717888] w-full rounded-sm p-2 placeholder:italic placeholder:text-tertiary-alt resize-none",
|
||||
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
|
||||
)}
|
||||
/>
|
||||
<p className="text-xs text-tertiary-alt">
|
||||
{t(I18nKey.SETTINGS$MCP_COMMAND_ARGUMENTS_HELP)}
|
||||
</p>
|
||||
</label>
|
||||
|
||||
<label className="flex flex-col gap-2.5 w-full max-w-[680px]">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm">
|
||||
{t(I18nKey.SETTINGS$MCP_ENVIRONMENT_VARIABLES)}
|
||||
</span>
|
||||
<OptionalTag />
|
||||
</div>
|
||||
<textarea
|
||||
data-testid="env-input"
|
||||
name="env"
|
||||
rows={4}
|
||||
defaultValue={formatEnvironmentVariables(server?.env)}
|
||||
placeholder="KEY1=value1 KEY2=value2"
|
||||
className={cn(
|
||||
"resize-none",
|
||||
"bg-tertiary border border-[#717888] rounded-sm p-2 placeholder:italic placeholder:text-tertiary-alt",
|
||||
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
|
||||
)}
|
||||
/>
|
||||
</label>
|
||||
</>
|
||||
)}
|
||||
|
||||
<div className="flex items-center gap-4">
|
||||
<BrandButton
|
||||
testId="cancel-button"
|
||||
type="button"
|
||||
variant="secondary"
|
||||
onClick={onCancel}
|
||||
>
|
||||
{t(I18nKey.BUTTON$CANCEL)}
|
||||
</BrandButton>
|
||||
<BrandButton testId="submit-button" type="submit" variant="primary">
|
||||
{mode === "add" && t(I18nKey.SETTINGS$MCP_ADD_SERVER)}
|
||||
{mode === "edit" && t(I18nKey.SETTINGS$MCP_SAVE_SERVER)}
|
||||
</BrandButton>
|
||||
</div>
|
||||
</form>
|
||||
);
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
import { FaPencil, FaTrash } from "react-icons/fa6";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
interface MCPServerConfig {
|
||||
id: string;
|
||||
type: "sse" | "stdio" | "shttp";
|
||||
name?: string;
|
||||
url?: string;
|
||||
api_key?: string;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
export function MCPServerListItem({
|
||||
server,
|
||||
onEdit,
|
||||
onDelete,
|
||||
}: {
|
||||
server: MCPServerConfig;
|
||||
onEdit: () => void;
|
||||
onDelete: () => void;
|
||||
}) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const getServerTypeLabel = (type: string) => {
|
||||
switch (type) {
|
||||
case "sse":
|
||||
return t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SSE);
|
||||
case "stdio":
|
||||
return t(I18nKey.SETTINGS$MCP_SERVER_TYPE_STDIO);
|
||||
case "shttp":
|
||||
return t(I18nKey.SETTINGS$MCP_SERVER_TYPE_SHTTP);
|
||||
default:
|
||||
return type.toUpperCase();
|
||||
}
|
||||
};
|
||||
|
||||
const getServerDescription = (serverConfig: MCPServerConfig) => {
|
||||
if (serverConfig.type === "stdio") {
|
||||
if (serverConfig.command) {
|
||||
const args =
|
||||
serverConfig.args && serverConfig.args.length > 0
|
||||
? ` ${serverConfig.args.join(" ")}`
|
||||
: "";
|
||||
return `${serverConfig.command}${args}`;
|
||||
}
|
||||
return serverConfig.name || "";
|
||||
}
|
||||
if (
|
||||
(serverConfig.type === "sse" || serverConfig.type === "shttp") &&
|
||||
serverConfig.url
|
||||
) {
|
||||
return serverConfig.url;
|
||||
}
|
||||
return "";
|
||||
};
|
||||
|
||||
const serverName = server.type === "stdio" ? server.name : server.url;
|
||||
const serverDescription = getServerDescription(server);
|
||||
|
||||
return (
|
||||
<tr
|
||||
data-testid="mcp-server-item"
|
||||
className="grid grid-cols-[minmax(0,0.25fr)_120px_minmax(0,1fr)_120px] gap-4 items-start border-t border-tertiary"
|
||||
>
|
||||
<td
|
||||
className="p-3 text-sm text-content-2 truncate min-w-0"
|
||||
title={serverName}
|
||||
>
|
||||
{serverName}
|
||||
</td>
|
||||
|
||||
<td className="p-3 text-sm text-content-2 whitespace-nowrap">
|
||||
{getServerTypeLabel(server.type)}
|
||||
</td>
|
||||
|
||||
<td
|
||||
className="p-3 text-sm text-content-2 opacity-80 italic min-w-0 truncate"
|
||||
title={serverDescription}
|
||||
>
|
||||
<span className="inline-block max-w-full align-bottom">
|
||||
{serverDescription}
|
||||
</span>
|
||||
</td>
|
||||
|
||||
<td className="p-3 flex items-start justify-end gap-4 whitespace-nowrap">
|
||||
<button
|
||||
data-testid="edit-mcp-server-button"
|
||||
type="button"
|
||||
onClick={onEdit}
|
||||
aria-label={`Edit ${serverName}`}
|
||||
className="cursor-pointer hover:text-content-1 transition-colors"
|
||||
>
|
||||
<FaPencil size={16} />
|
||||
</button>
|
||||
<button
|
||||
data-testid="delete-mcp-server-button"
|
||||
type="button"
|
||||
onClick={onDelete}
|
||||
aria-label={`Delete ${serverName}`}
|
||||
className="cursor-pointer hover:text-content-1 transition-colors"
|
||||
>
|
||||
<FaTrash size={16} />
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { MCPServerListItem } from "./mcp-server-list-item";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
interface MCPServerConfig {
|
||||
id: string;
|
||||
type: "sse" | "stdio" | "shttp";
|
||||
name?: string;
|
||||
url?: string;
|
||||
api_key?: string;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
interface MCPServerListProps {
|
||||
servers: MCPServerConfig[];
|
||||
onEdit: (server: MCPServerConfig) => void;
|
||||
onDelete: (serverId: string) => void;
|
||||
}
|
||||
|
||||
export function MCPServerList({
|
||||
servers,
|
||||
onEdit,
|
||||
onDelete,
|
||||
}: MCPServerListProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
if (servers.length === 0) {
|
||||
return (
|
||||
<div className="border border-tertiary rounded-md p-8 text-center">
|
||||
<p className="text-content-2 text-sm">
|
||||
{t(I18nKey.SETTINGS$MCP_NO_SERVERS)}
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="border border-tertiary rounded-md overflow-hidden">
|
||||
<table className="w-full">
|
||||
<thead className="bg-base-tertiary">
|
||||
<tr className="grid grid-cols-[minmax(0,0.25fr)_120px_minmax(0,1fr)_120px] gap-4 items-start">
|
||||
<th className="text-left p-3 text-sm font-medium">
|
||||
{t(I18nKey.SETTINGS$NAME)}
|
||||
</th>
|
||||
<th className="text-left p-3 text-sm font-medium">
|
||||
{t(I18nKey.SETTINGS$MCP_SERVER_TYPE)}
|
||||
</th>
|
||||
<th className="text-left p-3 text-sm font-medium">
|
||||
{t(I18nKey.SETTINGS$MCP_SERVER_DETAILS)}
|
||||
</th>
|
||||
<th className="text-right p-3 text-sm font-medium">
|
||||
{t(I18nKey.SETTINGS$ACTIONS)}
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{servers.map((server) => (
|
||||
<MCPServerListItem
|
||||
key={server.id}
|
||||
server={server}
|
||||
onEdit={() => onEdit(server)}
|
||||
onDelete={() => onDelete(server.id)}
|
||||
/>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
import { Tooltip } from "@heroui/react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import ConfirmIcon from "#/assets/confirm";
|
||||
import RejectIcon from "#/assets/reject";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { cn } from "#/utils/utils";
|
||||
|
||||
interface ActionTooltipProps {
|
||||
type: "confirm" | "reject";
|
||||
@@ -11,35 +12,25 @@ interface ActionTooltipProps {
|
||||
export function ActionTooltip({ type, onClick }: ActionTooltipProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const isConfirm = type === "confirm";
|
||||
|
||||
const ariaLabel = isConfirm
|
||||
? t(I18nKey.ACTION$CONFIRM)
|
||||
: t(I18nKey.ACTION$REJECT);
|
||||
|
||||
const content = isConfirm
|
||||
? t(I18nKey.CHAT_INTERFACE$USER_CONFIRMED)
|
||||
: t(I18nKey.CHAT_INTERFACE$USER_REJECTED);
|
||||
|
||||
const buttonLabel = isConfirm
|
||||
? `${t(I18nKey.CHAT_INTERFACE$INPUT_CONTINUE_MESSAGE)} ⌘↩`
|
||||
: `${t(I18nKey.BUTTON$CANCEL)} ⇧⌘⌫`;
|
||||
const content =
|
||||
type === "confirm"
|
||||
? t(I18nKey.CHAT_INTERFACE$USER_CONFIRMED)
|
||||
: t(I18nKey.CHAT_INTERFACE$USER_REJECTED);
|
||||
|
||||
return (
|
||||
<Tooltip content={content} closeDelay={100}>
|
||||
<button
|
||||
data-testid={`action-${type}-button`}
|
||||
type="button"
|
||||
aria-label={ariaLabel}
|
||||
className={cn(
|
||||
"rounded px-2 h-6.5 text-sm font-medium leading-5 cursor-pointer hover:opacity-80",
|
||||
aria-label={
|
||||
type === "confirm"
|
||||
? "bg-tertiary text-white"
|
||||
: "bg-white text-[#0D0F11]",
|
||||
)}
|
||||
? t(I18nKey.ACTION$CONFIRM)
|
||||
: t(I18nKey.ACTION$REJECT)
|
||||
}
|
||||
className="bg-tertiary rounded-full p-1 hover:bg-base-secondary"
|
||||
onClick={onClick}
|
||||
>
|
||||
{buttonLabel}
|
||||
{type === "confirm" ? <ConfirmIcon /> : <RejectIcon />}
|
||||
</button>
|
||||
</Tooltip>
|
||||
);
|
||||
|
||||
@@ -1,120 +1,31 @@
|
||||
import { useDispatch, useSelector } from "react-redux";
|
||||
import { useCallback, useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { AgentState } from "#/types/agent-state";
|
||||
import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
|
||||
import { useWsClient } from "#/context/ws-client-provider";
|
||||
import { ActionTooltip } from "../action-tooltip";
|
||||
import { isOpenHandsAction } from "#/types/core/guards";
|
||||
import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
|
||||
import { RiskAlert } from "#/components/shared/risk-alert";
|
||||
import WarningIcon from "#/icons/u-warning.svg?react";
|
||||
import { RootState } from "#/store";
|
||||
import { addSubmittedEventId } from "#/state/event-message-slice";
|
||||
|
||||
export function ConfirmationButtons() {
|
||||
const submittedEventIds = useSelector(
|
||||
(state: RootState) => state.eventMessage.submittedEventIds,
|
||||
);
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { t } = useTranslation();
|
||||
const { send } = useWsClient();
|
||||
|
||||
const { send, parsedEvents } = useWsClient();
|
||||
|
||||
// Find the most recent action awaiting confirmation
|
||||
const awaitingAction = parsedEvents
|
||||
.slice()
|
||||
.reverse()
|
||||
.find((ev) => {
|
||||
if (!isOpenHandsAction(ev) || ev.source !== "agent") return false;
|
||||
const args = ev.args as Record<string, unknown>;
|
||||
return args?.confirmation_state === "awaiting_confirmation";
|
||||
});
|
||||
|
||||
const handleStateChange = useCallback(
|
||||
(state: AgentState) => {
|
||||
if (!awaitingAction) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch(addSubmittedEventId(awaitingAction.id));
|
||||
send(generateAgentStateChangeEvent(state));
|
||||
},
|
||||
[send],
|
||||
);
|
||||
|
||||
// Handle keyboard shortcuts
|
||||
useEffect(() => {
|
||||
if (!awaitingAction) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const handleCancelShortcut = (event: KeyboardEvent) => {
|
||||
if (event.shiftKey && event.metaKey && event.key === "Backspace") {
|
||||
event.preventDefault();
|
||||
handleStateChange(AgentState.USER_REJECTED);
|
||||
}
|
||||
};
|
||||
|
||||
const handleContinueShortcut = (event: KeyboardEvent) => {
|
||||
if (event.metaKey && event.key === "Enter") {
|
||||
event.preventDefault();
|
||||
handleStateChange(AgentState.USER_CONFIRMED);
|
||||
}
|
||||
};
|
||||
|
||||
const handleKeyDown = (event: KeyboardEvent) => {
|
||||
// Cancel: Shift+Cmd+Backspace (⇧⌘⌫)
|
||||
handleCancelShortcut(event);
|
||||
// Continue: Cmd+Enter (⌘↩)
|
||||
handleContinueShortcut(event);
|
||||
};
|
||||
|
||||
document.addEventListener("keydown", handleKeyDown);
|
||||
|
||||
return () => document.removeEventListener("keydown", handleKeyDown);
|
||||
}, [awaitingAction, handleStateChange]);
|
||||
|
||||
if (!awaitingAction || submittedEventIds.includes(awaitingAction.id)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { args } = awaitingAction as { args: Record<string, unknown> };
|
||||
|
||||
const risk = args?.security_risk;
|
||||
|
||||
const isHighRisk =
|
||||
typeof risk === "string"
|
||||
? risk.toLowerCase() === "high"
|
||||
: Number(risk) === ActionSecurityRisk.HIGH;
|
||||
const handleStateChange = (state: AgentState) => {
|
||||
const event = generateAgentStateChangeEvent(state);
|
||||
send(event);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2 pt-4">
|
||||
{isHighRisk && (
|
||||
<RiskAlert
|
||||
content={t(I18nKey.CHAT_INTERFACE$HIGH_RISK_WARNING)}
|
||||
icon={<WarningIcon width={16} height={16} color="#fff" />}
|
||||
severity="high"
|
||||
title={t(I18nKey.COMMON$HIGH_RISK)}
|
||||
<div className="flex justify-between items-center pt-4">
|
||||
<p>{t(I18nKey.CHAT_INTERFACE$USER_ASK_CONFIRMATION)}</p>
|
||||
<div className="flex items-center gap-3">
|
||||
<ActionTooltip
|
||||
type="confirm"
|
||||
onClick={() => handleStateChange(AgentState.USER_CONFIRMED)}
|
||||
/>
|
||||
<ActionTooltip
|
||||
type="reject"
|
||||
onClick={() => handleStateChange(AgentState.USER_REJECTED)}
|
||||
/>
|
||||
)}
|
||||
<div className="flex justify-between items-center">
|
||||
<p className="text-sm font-normal text-white">
|
||||
{t(I18nKey.CHAT_INTERFACE$USER_ASK_CONFIRMATION)}
|
||||
</p>
|
||||
<div className="flex items-center gap-3">
|
||||
<ActionTooltip
|
||||
type="reject"
|
||||
onClick={() => handleStateChange(AgentState.USER_REJECTED)}
|
||||
/>
|
||||
<ActionTooltip
|
||||
type="confirm"
|
||||
onClick={() => handleStateChange(AgentState.USER_CONFIRMED)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -23,7 +23,7 @@ export function ModalBackdrop({ children, onClose }: ModalBackdropProps) {
|
||||
<div className="fixed inset-0 flex items-center justify-center z-20">
|
||||
<div
|
||||
onClick={handleClick}
|
||||
className="fixed inset-0 bg-black opacity-60"
|
||||
className="fixed inset-0 bg-black bg-opacity-80"
|
||||
/>
|
||||
<div className="relative">{children}</div>
|
||||
</div>
|
||||
|
||||
@@ -93,14 +93,14 @@ function SecurityInvariant() {
|
||||
(risk: ActionSecurityRisk) => {
|
||||
switch (risk) {
|
||||
case ActionSecurityRisk.LOW:
|
||||
return t(I18nKey.SECURITY$LOW_RISK);
|
||||
return t(I18nKey.SECURITY_ANALYZER$LOW_RISK);
|
||||
case ActionSecurityRisk.MEDIUM:
|
||||
return t(I18nKey.SECURITY$MEDIUM_RISK);
|
||||
return t(I18nKey.SECURITY_ANALYZER$MEDIUM_RISK);
|
||||
case ActionSecurityRisk.HIGH:
|
||||
return t(I18nKey.SECURITY$HIGH_RISK);
|
||||
return t(I18nKey.SECURITY_ANALYZER$HIGH_RISK);
|
||||
case ActionSecurityRisk.UNKNOWN:
|
||||
default:
|
||||
return t(I18nKey.SECURITY$UNKNOWN_RISK);
|
||||
return t(I18nKey.SECURITY_ANALYZER$UNKNOWN_RISK);
|
||||
}
|
||||
},
|
||||
[t],
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
import { ReactNode } from "react";
|
||||
import { cn } from "#/utils/utils";
|
||||
|
||||
interface RiskAlertProps {
|
||||
className?: string;
|
||||
content: ReactNode;
|
||||
icon?: ReactNode;
|
||||
severity: "high" | "medium" | "low";
|
||||
title: string;
|
||||
}
|
||||
|
||||
export function RiskAlert({
|
||||
className,
|
||||
content,
|
||||
icon,
|
||||
severity,
|
||||
title,
|
||||
}: RiskAlertProps) {
|
||||
// Currently, we are only supporting the high risk alert. If we use want to support other risk levels, we can add them here and use cva to create different variants of this component.
|
||||
if (severity === "high") {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center gap-3.5 bg-[#4A0709] border border-[#FF0006] text-red-400 rounded-xl px-3.5 h-13 text-sm text-white",
|
||||
className,
|
||||
)}
|
||||
>
|
||||
{icon && <span className="">{icon}</span>}
|
||||
<span className="font-bold">{title}</span>
|
||||
<span className="font-normal">{content}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -1,67 +0,0 @@
|
||||
import { useMutation, useQueryClient } from "@tanstack/react-query";
|
||||
import { useSettings } from "#/hooks/query/use-settings";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { MCPSSEServer, MCPStdioServer, MCPSHTTPServer } from "#/types/settings";
|
||||
|
||||
type MCPServerType = "sse" | "stdio" | "shttp";
|
||||
|
||||
interface MCPServerConfig {
|
||||
type: MCPServerType;
|
||||
name?: string;
|
||||
url?: string;
|
||||
api_key?: string;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
export function useAddMcpServer() {
|
||||
const queryClient = useQueryClient();
|
||||
const { data: settings } = useSettings();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (server: MCPServerConfig): Promise<void> => {
|
||||
if (!settings) return;
|
||||
|
||||
const currentConfig = settings.MCP_CONFIG || {
|
||||
sse_servers: [],
|
||||
stdio_servers: [],
|
||||
shttp_servers: [],
|
||||
};
|
||||
|
||||
const newConfig = { ...currentConfig };
|
||||
|
||||
if (server.type === "sse") {
|
||||
const sseServer: MCPSSEServer = {
|
||||
url: server.url!,
|
||||
...(server.api_key && { api_key: server.api_key }),
|
||||
};
|
||||
newConfig.sse_servers.push(sseServer);
|
||||
} else if (server.type === "stdio") {
|
||||
const stdioServer: MCPStdioServer = {
|
||||
name: server.name!,
|
||||
command: server.command!,
|
||||
...(server.args && { args: server.args }),
|
||||
...(server.env && { env: server.env }),
|
||||
};
|
||||
newConfig.stdio_servers.push(stdioServer);
|
||||
} else if (server.type === "shttp") {
|
||||
const shttpServer: MCPSHTTPServer = {
|
||||
url: server.url!,
|
||||
...(server.api_key && { api_key: server.api_key }),
|
||||
};
|
||||
newConfig.shttp_servers.push(shttpServer);
|
||||
}
|
||||
|
||||
const apiSettings = {
|
||||
mcp_config: newConfig,
|
||||
};
|
||||
|
||||
await OpenHands.saveSettings(apiSettings);
|
||||
},
|
||||
onSuccess: () => {
|
||||
// Invalidate the settings query to trigger a refetch
|
||||
queryClient.invalidateQueries({ queryKey: ["settings"] });
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
import { useMutation, useQueryClient } from "@tanstack/react-query";
|
||||
import { useSettings } from "#/hooks/query/use-settings";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { MCPConfig } from "#/types/settings";
|
||||
|
||||
export function useDeleteMcpServer() {
|
||||
const queryClient = useQueryClient();
|
||||
const { data: settings } = useSettings();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (serverId: string): Promise<void> => {
|
||||
if (!settings?.MCP_CONFIG) return;
|
||||
|
||||
const newConfig: MCPConfig = { ...settings.MCP_CONFIG };
|
||||
const [serverType, indexStr] = serverId.split("-");
|
||||
const index = parseInt(indexStr, 10);
|
||||
|
||||
if (serverType === "sse") {
|
||||
newConfig.sse_servers.splice(index, 1);
|
||||
} else if (serverType === "stdio") {
|
||||
newConfig.stdio_servers.splice(index, 1);
|
||||
} else if (serverType === "shttp") {
|
||||
newConfig.shttp_servers.splice(index, 1);
|
||||
}
|
||||
|
||||
const apiSettings = {
|
||||
mcp_config: newConfig,
|
||||
};
|
||||
|
||||
await OpenHands.saveSettings(apiSettings);
|
||||
},
|
||||
onSuccess: () => {
|
||||
// Invalidate the settings query to trigger a refetch
|
||||
queryClient.invalidateQueries({ queryKey: ["settings"] });
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
import { useMutation, useQueryClient } from "@tanstack/react-query";
|
||||
import { useSettings } from "#/hooks/query/use-settings";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { MCPSSEServer, MCPStdioServer, MCPSHTTPServer } from "#/types/settings";
|
||||
|
||||
type MCPServerType = "sse" | "stdio" | "shttp";
|
||||
|
||||
interface MCPServerConfig {
|
||||
type: MCPServerType;
|
||||
name?: string;
|
||||
url?: string;
|
||||
api_key?: string;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
export function useUpdateMcpServer() {
|
||||
const queryClient = useQueryClient();
|
||||
const { data: settings } = useSettings();
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async ({
|
||||
serverId,
|
||||
server,
|
||||
}: {
|
||||
serverId: string;
|
||||
server: MCPServerConfig;
|
||||
}): Promise<void> => {
|
||||
if (!settings?.MCP_CONFIG) return;
|
||||
|
||||
const newConfig = { ...settings.MCP_CONFIG };
|
||||
const [serverType, indexStr] = serverId.split("-");
|
||||
const index = parseInt(indexStr, 10);
|
||||
|
||||
if (serverType === "sse") {
|
||||
const sseServer: MCPSSEServer = {
|
||||
url: server.url!,
|
||||
...(server.api_key && { api_key: server.api_key }),
|
||||
};
|
||||
newConfig.sse_servers[index] = sseServer;
|
||||
} else if (serverType === "stdio") {
|
||||
const stdioServer: MCPStdioServer = {
|
||||
name: server.name!,
|
||||
command: server.command!,
|
||||
...(server.args && { args: server.args }),
|
||||
...(server.env && { env: server.env }),
|
||||
};
|
||||
newConfig.stdio_servers[index] = stdioServer;
|
||||
} else if (serverType === "shttp") {
|
||||
const shttpServer: MCPSHTTPServer = {
|
||||
url: server.url!,
|
||||
...(server.api_key && { api_key: server.api_key }),
|
||||
};
|
||||
newConfig.shttp_servers[index] = shttpServer;
|
||||
}
|
||||
|
||||
const apiSettings = {
|
||||
mcp_config: newConfig,
|
||||
};
|
||||
|
||||
await OpenHands.saveSettings(apiSettings);
|
||||
},
|
||||
onSuccess: () => {
|
||||
// Invalidate the settings query to trigger a refetch
|
||||
queryClient.invalidateQueries({ queryKey: ["settings"] });
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -357,7 +357,6 @@ export enum I18nKey {
|
||||
CHAT_INTERFACE$INPUT_PLACEHOLDER = "CHAT_INTERFACE$INPUT_PLACEHOLDER",
|
||||
CHAT_INTERFACE$INPUT_CONTINUE_MESSAGE = "CHAT_INTERFACE$INPUT_CONTINUE_MESSAGE",
|
||||
CHAT_INTERFACE$USER_ASK_CONFIRMATION = "CHAT_INTERFACE$USER_ASK_CONFIRMATION",
|
||||
CHAT_INTERFACE$HIGH_RISK_WARNING = "CHAT_INTERFACE$HIGH_RISK_WARNING",
|
||||
CHAT_INTERFACE$USER_CONFIRMED = "CHAT_INTERFACE$USER_CONFIRMED",
|
||||
CHAT_INTERFACE$USER_REJECTED = "CHAT_INTERFACE$USER_REJECTED",
|
||||
CHAT_INTERFACE$INPUT_SEND_MESSAGE_BUTTON_CONTENT = "CHAT_INTERFACE$INPUT_SEND_MESSAGE_BUTTON_CONTENT",
|
||||
@@ -372,6 +371,10 @@ export enum I18nKey {
|
||||
CHAT_INTERFACE$MESSAGE_ARIA_LABEL = "CHAT_INTERFACE$MESSAGE_ARIA_LABEL",
|
||||
CHAT_INTERFACE$CHAT_CONVERSATION = "CHAT_INTERFACE$CHAT_CONVERSATION",
|
||||
CHAT_INTERFACE$UNKNOWN_SENDER = "CHAT_INTERFACE$UNKNOWN_SENDER",
|
||||
SECURITY_ANALYZER$UNKNOWN_RISK = "SECURITY_ANALYZER$UNKNOWN_RISK",
|
||||
SECURITY_ANALYZER$LOW_RISK = "SECURITY_ANALYZER$LOW_RISK",
|
||||
SECURITY_ANALYZER$MEDIUM_RISK = "SECURITY_ANALYZER$MEDIUM_RISK",
|
||||
SECURITY_ANALYZER$HIGH_RISK = "SECURITY_ANALYZER$HIGH_RISK",
|
||||
SETTINGS$MODEL_TOOLTIP = "SETTINGS$MODEL_TOOLTIP",
|
||||
SETTINGS$AGENT_TOOLTIP = "SETTINGS$AGENT_TOOLTIP",
|
||||
SETTINGS$LANGUAGE_TOOLTIP = "SETTINGS$LANGUAGE_TOOLTIP",
|
||||
@@ -382,12 +385,9 @@ export enum I18nKey {
|
||||
SETTINGS$REFRESH_LLM_API_KEY = "SETTINGS$REFRESH_LLM_API_KEY",
|
||||
SETTINGS$CONFIRMATION_MODE = "SETTINGS$CONFIRMATION_MODE",
|
||||
SETTINGS$CONFIRMATION_MODE_TOOLTIP = "SETTINGS$CONFIRMATION_MODE_TOOLTIP",
|
||||
SETTINGS$CONFIRMATION_MODE_LOCK_TOOLTIP = "SETTINGS$CONFIRMATION_MODE_LOCK_TOOLTIP",
|
||||
SETTINGS$AGENT_SELECT_ENABLED = "SETTINGS$AGENT_SELECT_ENABLED",
|
||||
SETTINGS$SECURITY_ANALYZER = "SETTINGS$SECURITY_ANALYZER",
|
||||
SETTINGS$SECURITY_ANALYZER_PLACEHOLDER = "SETTINGS$SECURITY_ANALYZER_PLACEHOLDER",
|
||||
SETTINGS$SECURITY_ANALYZER_TOOLTIP = "SETTINGS$SECURITY_ANALYZER_TOOLTIP",
|
||||
SETTINGS$SECURITY_ANALYZER_DESCRIPTION = "SETTINGS$SECURITY_ANALYZER_DESCRIPTION",
|
||||
SETTINGS$DONT_KNOW_API_KEY = "SETTINGS$DONT_KNOW_API_KEY",
|
||||
SETTINGS$CLICK_FOR_INSTRUCTIONS = "SETTINGS$CLICK_FOR_INSTRUCTIONS",
|
||||
SETTINGS$SAVED = "SETTINGS$SAVED",
|
||||
@@ -781,41 +781,4 @@ export enum I18nKey {
|
||||
PROJECT_MANAGEMENT$SVC_ACC_EMAIL_VALIDATION_ERROR = "PROJECT_MANAGEMENT$SVC_ACC_EMAIL_VALIDATION_ERROR",
|
||||
PROJECT_MANAGEMENT$SVC_ACC_API_KEY_VALIDATION_ERROR = "PROJECT_MANAGEMENT$SVC_ACC_API_KEY_VALIDATION_ERROR",
|
||||
MICROAGENT_MANAGEMENT$ERROR_LOADING_MICROAGENT_CONTENT = "MICROAGENT_MANAGEMENT$ERROR_LOADING_MICROAGENT_CONTENT",
|
||||
SETTINGS$MCP_SERVER_TYPE_SSE = "SETTINGS$MCP_SERVER_TYPE_SSE",
|
||||
SETTINGS$MCP_SERVER_TYPE_STDIO = "SETTINGS$MCP_SERVER_TYPE_STDIO",
|
||||
SETTINGS$MCP_SERVER_TYPE_SHTTP = "SETTINGS$MCP_SERVER_TYPE_SHTTP",
|
||||
SETTINGS$MCP_ERROR_URL_REQUIRED = "SETTINGS$MCP_ERROR_URL_REQUIRED",
|
||||
SETTINGS$MCP_ERROR_URL_INVALID_PROTOCOL = "SETTINGS$MCP_ERROR_URL_INVALID_PROTOCOL",
|
||||
SETTINGS$MCP_ERROR_URL_INVALID = "SETTINGS$MCP_ERROR_URL_INVALID",
|
||||
SETTINGS$MCP_ERROR_NAME_REQUIRED = "SETTINGS$MCP_ERROR_NAME_REQUIRED",
|
||||
SETTINGS$MCP_ERROR_NAME_INVALID = "SETTINGS$MCP_ERROR_NAME_INVALID",
|
||||
SETTINGS$MCP_ERROR_NAME_DUPLICATE = "SETTINGS$MCP_ERROR_NAME_DUPLICATE",
|
||||
SETTINGS$MCP_ERROR_COMMAND_REQUIRED = "SETTINGS$MCP_ERROR_COMMAND_REQUIRED",
|
||||
SETTINGS$MCP_ERROR_COMMAND_NO_SPACES = "SETTINGS$MCP_ERROR_COMMAND_NO_SPACES",
|
||||
SETTINGS$MCP_ERROR_URL_DUPLICATE = "SETTINGS$MCP_ERROR_URL_DUPLICATE",
|
||||
SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT = "SETTINGS$MCP_ERROR_ENV_INVALID_FORMAT",
|
||||
SETTINGS$MCP_SERVER_TYPE = "SETTINGS$MCP_SERVER_TYPE",
|
||||
SETTINGS$MCP_API_KEY_PLACEHOLDER = "SETTINGS$MCP_API_KEY_PLACEHOLDER",
|
||||
SETTINGS$MCP_COMMAND_ARGUMENTS = "SETTINGS$MCP_COMMAND_ARGUMENTS",
|
||||
SETTINGS$MCP_COMMAND_ARGUMENTS_HELP = "SETTINGS$MCP_COMMAND_ARGUMENTS_HELP",
|
||||
SETTINGS$MCP_ENVIRONMENT_VARIABLES = "SETTINGS$MCP_ENVIRONMENT_VARIABLES",
|
||||
SETTINGS$MCP_ADD_SERVER = "SETTINGS$MCP_ADD_SERVER",
|
||||
SETTINGS$MCP_SAVE_SERVER = "SETTINGS$MCP_SAVE_SERVER",
|
||||
SETTINGS$MCP_NO_SERVERS = "SETTINGS$MCP_NO_SERVERS",
|
||||
SETTINGS$MCP_SERVER_DETAILS = "SETTINGS$MCP_SERVER_DETAILS",
|
||||
SETTINGS$MCP_CONFIRM_DELETE = "SETTINGS$MCP_CONFIRM_DELETE",
|
||||
SETTINGS$MCP_CONFIRM_CHANGES = "SETTINGS$MCP_CONFIRM_CHANGES",
|
||||
SETTINGS$MCP_DEFAULT_CONFIG = "SETTINGS$MCP_DEFAULT_CONFIG",
|
||||
PROJECT_MANAGEMENT$WORKSPACE_NAME_PLACEHOLDER = "PROJECT_MANAGEMENT$WORKSPACE_NAME_PLACEHOLDER",
|
||||
PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION = "PROJECT_MANAGEMENT$CONFIGURE_MODAL_DESCRIPTION",
|
||||
PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION = "PROJECT_MANAGEMENT$IMPORTANT_WORKSPACE_INTEGRATION",
|
||||
SETTINGS = "SETTINGS",
|
||||
MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT = "MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT",
|
||||
MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW = "MICROAGENT_MANAGEMENT$PR_READY_FOR_REVIEW",
|
||||
MICROAGENT_MANAGEMENT$PR_NOT_CREATED = "MICROAGENT_MANAGEMENT$PR_NOT_CREATED",
|
||||
MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT = "MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT",
|
||||
SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT = "SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT",
|
||||
SETTINGS$SECURITY_ANALYZER_NONE = "SETTINGS$SECURITY_ANALYZER_NONE",
|
||||
SETTINGS$SECURITY_ANALYZER_INVARIANT = "SETTINGS$SECURITY_ANALYZER_INVARIANT",
|
||||
COMMON$HIGH_RISK = "COMMON$HIGH_RISK",
|
||||
}
|
||||
|
||||
+12482
-13074
File diff suppressed because it is too large
Load Diff
@@ -1,5 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
|
||||
<path d="M12 14C11.4477 14 11 13.5523 11 13V10C11 9.44772 11.4477 9 12 9C12.5523 9 13 9.44772 13 10V13C13 13.5523 12.5523 14 12 14Z" fill="currentColor"/>
|
||||
<path d="M10.5 16.5C10.5 15.6716 11.1716 15 12 15C12.8284 15 13.5 15.6716 13.5 16.5C13.5 17.3284 12.8284 18 12 18C11.1716 18 10.5 17.3284 10.5 16.5Z" fill="currentColor"/>
|
||||
<path d="M10.2301 3.2156C10.98 1.79093 13.02 1.79092 13.7698 3.2156L22.1135 19.0685C22.8144 20.4003 21.8486 22 20.3436 22H3.65635C2.15133 22 1.18556 20.4003 1.88651 19.0685L10.2301 3.2156ZM20.3436 20L12 4.1471L3.65635 20L20.3436 20Z" fill="currentColor"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 692 B |
@@ -123,7 +123,7 @@ const openHandsHandlers = [
|
||||
),
|
||||
|
||||
http.get("/api/options/security-analyzers", async () =>
|
||||
HttpResponse.json(["llm", "none"]),
|
||||
HttpResponse.json(["mock-invariant"]),
|
||||
),
|
||||
|
||||
http.post("http://localhost:3001/api/submit-feedback", async () => {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { useDisclosure } from "@heroui/react";
|
||||
import React from "react";
|
||||
import { useNavigate } from "react-router";
|
||||
import { useDispatch } from "react-redux";
|
||||
@@ -17,7 +18,7 @@ import {
|
||||
Orientation,
|
||||
ResizablePanel,
|
||||
} from "#/components/layout/resizable-panel";
|
||||
|
||||
import Security from "#/components/shared/modals/security/security";
|
||||
import { useActiveConversation } from "#/hooks/query/use-active-conversation";
|
||||
import { useSettings } from "#/hooks/query/use-settings";
|
||||
import { displayErrorToast } from "#/utils/custom-toast-handlers";
|
||||
@@ -82,6 +83,12 @@ function AppContent() {
|
||||
};
|
||||
}, []);
|
||||
|
||||
const {
|
||||
isOpen: securityModalIsOpen,
|
||||
onOpen: onSecurityModalOpen,
|
||||
onOpenChange: onSecurityModalOpenChange,
|
||||
} = useDisclosure();
|
||||
|
||||
function renderMain() {
|
||||
if (width <= 1024) {
|
||||
return (
|
||||
@@ -99,7 +106,7 @@ function AppContent() {
|
||||
<ResizablePanel
|
||||
orientation={Orientation.HORIZONTAL}
|
||||
className="grow h-full min-h-0 min-w-0"
|
||||
initialSize={564}
|
||||
initialSize={500}
|
||||
firstClassName="rounded-xl overflow-hidden border border-neutral-600 bg-base-secondary"
|
||||
secondClassName="flex flex-col overflow-hidden"
|
||||
firstChild={<ChatInterface />}
|
||||
@@ -115,7 +122,17 @@ function AppContent() {
|
||||
<div data-testid="app-route" className="flex flex-col h-full gap-3">
|
||||
<div className="flex h-full overflow-auto">{renderMain()}</div>
|
||||
|
||||
<Controls showSecurityLock={!!settings?.CONFIRMATION_MODE} />
|
||||
<Controls
|
||||
setSecurityOpen={onSecurityModalOpen}
|
||||
showSecurityLock={!!settings?.SECURITY_ANALYZER}
|
||||
/>
|
||||
{settings && (
|
||||
<Security
|
||||
isOpen={securityModalIsOpen}
|
||||
onOpenChange={onSecurityModalOpenChange}
|
||||
securityAnalyzer={settings.SECURITY_ANALYZER}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</EventHandler>
|
||||
</ConversationSubscriptionsProvider>
|
||||
|
||||
@@ -8,8 +8,6 @@ import { useSettings } from "#/hooks/query/use-settings";
|
||||
import { hasAdvancedSettingsSet } from "#/utils/has-advanced-settings-set";
|
||||
import { useSaveSettings } from "#/hooks/mutation/use-save-settings";
|
||||
import { SettingsSwitch } from "#/components/features/settings/settings-switch";
|
||||
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
|
||||
import QuestionCircleIcon from "#/icons/question-circle.svg?react";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { SettingsInput } from "#/components/features/settings/settings-input";
|
||||
import { HelpLink } from "#/components/features/settings/help-link";
|
||||
@@ -38,6 +36,8 @@ function LlmSettingsScreen() {
|
||||
const { data: config } = useConfig();
|
||||
|
||||
const [view, setView] = React.useState<"basic" | "advanced">("basic");
|
||||
const [securityAnalyzerInputIsVisible, setSecurityAnalyzerInputIsVisible] =
|
||||
React.useState(false);
|
||||
|
||||
const [dirtyInputs, setDirtyInputs] = React.useState({
|
||||
model: false,
|
||||
@@ -55,19 +55,6 @@ function LlmSettingsScreen() {
|
||||
string | null
|
||||
>(null);
|
||||
|
||||
// Track confirmation mode state to control security analyzer visibility
|
||||
const [confirmationModeEnabled, setConfirmationModeEnabled] = React.useState(
|
||||
settings?.CONFIRMATION_MODE ?? DEFAULT_SETTINGS.CONFIRMATION_MODE,
|
||||
);
|
||||
|
||||
// Track selected security analyzer for form submission
|
||||
const [selectedSecurityAnalyzer, setSelectedSecurityAnalyzer] =
|
||||
React.useState(
|
||||
settings?.SECURITY_ANALYZER === null
|
||||
? "none"
|
||||
: (settings?.SECURITY_ANALYZER ?? DEFAULT_SETTINGS.SECURITY_ANALYZER),
|
||||
);
|
||||
|
||||
const modelsAndProviders = organizeModelsAndProviders(
|
||||
resources?.models || [],
|
||||
);
|
||||
@@ -87,6 +74,7 @@ function LlmSettingsScreen() {
|
||||
};
|
||||
|
||||
const userSettingsIsAdvanced = determineWhetherToToggleAdvancedSettings();
|
||||
if (settings) setSecurityAnalyzerInputIsVisible(settings.CONFIRMATION_MODE);
|
||||
|
||||
if (userSettingsIsAdvanced) setView("advanced");
|
||||
else setView("basic");
|
||||
@@ -99,20 +87,6 @@ function LlmSettingsScreen() {
|
||||
}
|
||||
}, [settings?.LLM_MODEL]);
|
||||
|
||||
// Update confirmation mode state when settings change
|
||||
React.useEffect(() => {
|
||||
if (settings?.CONFIRMATION_MODE !== undefined) {
|
||||
setConfirmationModeEnabled(settings.CONFIRMATION_MODE);
|
||||
}
|
||||
}, [settings?.CONFIRMATION_MODE]);
|
||||
|
||||
// Update selected security analyzer state when settings change
|
||||
React.useEffect(() => {
|
||||
if (settings?.SECURITY_ANALYZER !== undefined) {
|
||||
setSelectedSecurityAnalyzer(settings.SECURITY_ANALYZER || "none");
|
||||
}
|
||||
}, [settings?.SECURITY_ANALYZER]);
|
||||
|
||||
const handleSuccessfulMutation = () => {
|
||||
displaySuccessToast(t(I18nKey.SETTINGS$SAVED_WARNING));
|
||||
setDirtyInputs({
|
||||
@@ -140,11 +114,6 @@ function LlmSettingsScreen() {
|
||||
const model = formData.get("llm-model-input")?.toString();
|
||||
const apiKey = formData.get("llm-api-key-input")?.toString();
|
||||
const searchApiKey = formData.get("search-api-key-input")?.toString();
|
||||
const confirmationMode =
|
||||
formData.get("enable-confirmation-mode-switch")?.toString() === "on";
|
||||
const securityAnalyzer = formData
|
||||
.get("security-analyzer-input")
|
||||
?.toString();
|
||||
|
||||
const fullLlmModel = provider && model && `${provider}/${model}`;
|
||||
|
||||
@@ -153,15 +122,12 @@ function LlmSettingsScreen() {
|
||||
LLM_MODEL: fullLlmModel,
|
||||
llm_api_key: apiKey || null,
|
||||
SEARCH_API_KEY: searchApiKey || "",
|
||||
CONFIRMATION_MODE: confirmationMode,
|
||||
SECURITY_ANALYZER:
|
||||
securityAnalyzer === "none"
|
||||
? null
|
||||
: securityAnalyzer || DEFAULT_SETTINGS.SECURITY_ANALYZER,
|
||||
|
||||
// reset advanced settings
|
||||
LLM_BASE_URL: DEFAULT_SETTINGS.LLM_BASE_URL,
|
||||
AGENT: DEFAULT_SETTINGS.AGENT,
|
||||
CONFIRMATION_MODE: DEFAULT_SETTINGS.CONFIRMATION_MODE,
|
||||
SECURITY_ANALYZER: DEFAULT_SETTINGS.SECURITY_ANALYZER,
|
||||
ENABLE_DEFAULT_CONDENSER: DEFAULT_SETTINGS.ENABLE_DEFAULT_CONDENSER,
|
||||
},
|
||||
{
|
||||
@@ -194,10 +160,7 @@ function LlmSettingsScreen() {
|
||||
AGENT: agent,
|
||||
CONFIRMATION_MODE: confirmationMode,
|
||||
ENABLE_DEFAULT_CONDENSER: enableDefaultCondenser,
|
||||
SECURITY_ANALYZER:
|
||||
securityAnalyzer === "none"
|
||||
? null
|
||||
: securityAnalyzer || DEFAULT_SETTINGS.SECURITY_ANALYZER,
|
||||
SECURITY_ANALYZER: confirmationMode ? securityAnalyzer : undefined,
|
||||
},
|
||||
{
|
||||
onSuccess: handleSuccessfulMutation,
|
||||
@@ -212,6 +175,7 @@ function LlmSettingsScreen() {
|
||||
};
|
||||
|
||||
const handleToggleAdvancedSettings = (isToggled: boolean) => {
|
||||
setSecurityAnalyzerInputIsVisible(!!settings?.CONFIRMATION_MODE);
|
||||
setView(isToggled ? "advanced" : "basic");
|
||||
setDirtyInputs({
|
||||
model: false,
|
||||
@@ -282,21 +246,12 @@ function LlmSettingsScreen() {
|
||||
};
|
||||
|
||||
const handleConfirmationModeIsDirty = (isToggled: boolean) => {
|
||||
setSecurityAnalyzerInputIsVisible(isToggled);
|
||||
const confirmationModeIsDirty = isToggled !== settings?.CONFIRMATION_MODE;
|
||||
setDirtyInputs((prev) => ({
|
||||
...prev,
|
||||
confirmationMode: confirmationModeIsDirty,
|
||||
}));
|
||||
setConfirmationModeEnabled(isToggled);
|
||||
|
||||
// When confirmation mode is enabled, set default security analyzer to "llm" if not already set
|
||||
if (isToggled && !selectedSecurityAnalyzer) {
|
||||
setSelectedSecurityAnalyzer(DEFAULT_SETTINGS.SECURITY_ANALYZER);
|
||||
setDirtyInputs((prev) => ({
|
||||
...prev,
|
||||
securityAnalyzer: true,
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
const handleEnableDefaultCondenserIsDirty = (isToggled: boolean) => {
|
||||
@@ -319,47 +274,6 @@ function LlmSettingsScreen() {
|
||||
|
||||
const formIsDirty = Object.values(dirtyInputs).some((isDirty) => isDirty);
|
||||
|
||||
const getSecurityAnalyzerOptions = () => {
|
||||
const analyzers = resources?.securityAnalyzers || [];
|
||||
const orderedItems = [];
|
||||
|
||||
// Add LLM analyzer first
|
||||
if (analyzers.includes("llm")) {
|
||||
orderedItems.push({
|
||||
key: "llm",
|
||||
label: t(I18nKey.SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT),
|
||||
});
|
||||
}
|
||||
|
||||
// Add None option second
|
||||
orderedItems.push({
|
||||
key: "none",
|
||||
label: t(I18nKey.SETTINGS$SECURITY_ANALYZER_NONE),
|
||||
});
|
||||
|
||||
// Add Invariant analyzer third
|
||||
if (analyzers.includes("invariant")) {
|
||||
orderedItems.push({
|
||||
key: "invariant",
|
||||
label: t(I18nKey.SETTINGS$SECURITY_ANALYZER_INVARIANT),
|
||||
});
|
||||
}
|
||||
|
||||
// Add any other analyzers that might exist
|
||||
analyzers.forEach((analyzer) => {
|
||||
if (!["llm", "invariant", "none"].includes(analyzer)) {
|
||||
// For unknown analyzers, use the analyzer name as fallback
|
||||
// In the future, add specific i18n keys for new analyzers
|
||||
orderedItems.push({
|
||||
key: analyzer,
|
||||
label: analyzer, // TODO: Add i18n support for new analyzers
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return orderedItems;
|
||||
};
|
||||
|
||||
if (!settings || isFetching) return <LlmSettingsInputsSkeleton />;
|
||||
|
||||
return (
|
||||
@@ -538,7 +452,7 @@ function LlmSettingsScreen() {
|
||||
items={
|
||||
resources?.agents.map((agent) => ({
|
||||
key: agent,
|
||||
label: agent, // TODO: Add i18n support for agent names
|
||||
label: agent,
|
||||
})) || []
|
||||
}
|
||||
defaultSelectedKey={settings.AGENT}
|
||||
@@ -573,67 +487,39 @@ function LlmSettingsScreen() {
|
||||
>
|
||||
{t(I18nKey.SETTINGS$ENABLE_MEMORY_CONDENSATION)}
|
||||
</SettingsSwitch>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Confirmation mode and security analyzer - always visible */}
|
||||
<div className="flex items-center gap-2">
|
||||
<SettingsSwitch
|
||||
testId="enable-confirmation-mode-switch"
|
||||
name="enable-confirmation-mode-switch"
|
||||
onToggle={handleConfirmationModeIsDirty}
|
||||
defaultIsToggled={settings.CONFIRMATION_MODE}
|
||||
isBeta
|
||||
>
|
||||
{t(I18nKey.SETTINGS$CONFIRMATION_MODE)}
|
||||
</SettingsSwitch>
|
||||
<TooltipButton
|
||||
tooltip={t(I18nKey.SETTINGS$CONFIRMATION_MODE_TOOLTIP)}
|
||||
ariaLabel={t(I18nKey.SETTINGS$CONFIRMATION_MODE)}
|
||||
className="text-[#9099AC] hover:text-white cursor-help"
|
||||
>
|
||||
<QuestionCircleIcon width={16} height={16} />
|
||||
</TooltipButton>
|
||||
</div>
|
||||
<SettingsSwitch
|
||||
testId="enable-confirmation-mode-switch"
|
||||
name="enable-confirmation-mode-switch"
|
||||
onToggle={handleConfirmationModeIsDirty}
|
||||
defaultIsToggled={settings.CONFIRMATION_MODE}
|
||||
isBeta
|
||||
>
|
||||
{t(I18nKey.SETTINGS$CONFIRMATION_MODE)}
|
||||
</SettingsSwitch>
|
||||
|
||||
{confirmationModeEnabled && (
|
||||
<>
|
||||
<div className="w-full max-w-[680px]">
|
||||
{securityAnalyzerInputIsVisible && (
|
||||
<SettingsDropdownInput
|
||||
testId="security-analyzer-input"
|
||||
name="security-analyzer-display"
|
||||
name="security-analyzer-input"
|
||||
label={t(I18nKey.SETTINGS$SECURITY_ANALYZER)}
|
||||
items={getSecurityAnalyzerOptions()}
|
||||
items={
|
||||
resources?.securityAnalyzers.map((analyzer) => ({
|
||||
key: analyzer,
|
||||
label: analyzer,
|
||||
})) || []
|
||||
}
|
||||
placeholder={t(
|
||||
I18nKey.SETTINGS$SECURITY_ANALYZER_PLACEHOLDER,
|
||||
)}
|
||||
selectedKey={selectedSecurityAnalyzer || "none"}
|
||||
isClearable={false}
|
||||
onSelectionChange={(key) => {
|
||||
const newValue = key?.toString() || "";
|
||||
setSelectedSecurityAnalyzer(newValue);
|
||||
handleSecurityAnalyzerIsDirty(newValue);
|
||||
}}
|
||||
onInputChange={(value) => {
|
||||
// Handle when input is cleared
|
||||
if (!value) {
|
||||
setSelectedSecurityAnalyzer("");
|
||||
handleSecurityAnalyzerIsDirty("");
|
||||
}
|
||||
}}
|
||||
wrapperClassName="w-full"
|
||||
defaultSelectedKey={settings.SECURITY_ANALYZER}
|
||||
isClearable
|
||||
showOptionalTag
|
||||
onInputChange={handleSecurityAnalyzerIsDirty}
|
||||
wrapperClassName="w-full max-w-[680px]"
|
||||
/>
|
||||
{/* Hidden input to store the actual key value for form submission */}
|
||||
<input
|
||||
type="hidden"
|
||||
name="security-analyzer-input"
|
||||
value={selectedSecurityAnalyzer || ""}
|
||||
/>
|
||||
</div>
|
||||
<p className="text-xs text-tertiary-alt max-w-[680px]">
|
||||
{t(I18nKey.SETTINGS$SECURITY_ANALYZER_DESCRIPTION)}
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -1,191 +1,86 @@
|
||||
import React, { useState } from "react";
|
||||
import React, { useState, useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import posthog from "posthog-js";
|
||||
import { useSettings } from "#/hooks/query/use-settings";
|
||||
import { useDeleteMcpServer } from "#/hooks/mutation/use-delete-mcp-server";
|
||||
import { useAddMcpServer } from "#/hooks/mutation/use-add-mcp-server";
|
||||
import { useUpdateMcpServer } from "#/hooks/mutation/use-update-mcp-server";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
import { MCPServerList } from "#/components/features/settings/mcp-settings/mcp-server-list";
|
||||
import { MCPServerForm } from "#/components/features/settings/mcp-settings/mcp-server-form";
|
||||
import { ConfirmationModal } from "#/components/shared/modals/confirmation-modal";
|
||||
import { BrandButton } from "#/components/features/settings/brand-button";
|
||||
import { useSaveSettings } from "#/hooks/mutation/use-save-settings";
|
||||
import { MCPConfig } from "#/types/settings";
|
||||
|
||||
type MCPServerType = "sse" | "stdio" | "shttp";
|
||||
|
||||
interface MCPServerConfig {
|
||||
id: string;
|
||||
type: MCPServerType;
|
||||
name?: string;
|
||||
url?: string;
|
||||
api_key?: string;
|
||||
command?: string;
|
||||
args?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
import { MCPConfigEditor } from "#/components/features/settings/mcp-settings/mcp-config-editor";
|
||||
import { BrandButton } from "#/components/features/settings/brand-button";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import {
|
||||
displayErrorToast,
|
||||
displaySuccessToast,
|
||||
} from "#/utils/custom-toast-handlers";
|
||||
import { retrieveAxiosErrorMessage } from "#/utils/retrieve-axios-error-message";
|
||||
|
||||
function MCPSettingsScreen() {
|
||||
const { t } = useTranslation();
|
||||
const { data: settings, isLoading } = useSettings();
|
||||
const { mutate: deleteMcpServer } = useDeleteMcpServer();
|
||||
const { mutate: addMcpServer } = useAddMcpServer();
|
||||
const { mutate: updateMcpServer } = useUpdateMcpServer();
|
||||
const { mutate: saveSettings, isPending } = useSaveSettings();
|
||||
|
||||
const [view, setView] = useState<"list" | "add" | "edit">("list");
|
||||
const [editingServer, setEditingServer] = useState<MCPServerConfig | null>(
|
||||
null,
|
||||
);
|
||||
const [confirmationModalIsVisible, setConfirmationModalIsVisible] =
|
||||
useState(false);
|
||||
const [serverToDelete, setServerToDelete] = useState<string | null>(null);
|
||||
const [mcpConfig, setMcpConfig] = useState<MCPConfig | undefined>(undefined);
|
||||
const [isDirty, setIsDirty] = useState(false);
|
||||
|
||||
const mcpConfig: MCPConfig = settings?.MCP_CONFIG || {
|
||||
sse_servers: [],
|
||||
stdio_servers: [],
|
||||
shttp_servers: [],
|
||||
useEffect(() => {
|
||||
if (!mcpConfig && settings?.MCP_CONFIG) {
|
||||
setMcpConfig(settings.MCP_CONFIG);
|
||||
}
|
||||
}, [settings, mcpConfig]);
|
||||
|
||||
const handleConfigChange = (config: MCPConfig) => {
|
||||
setMcpConfig(config);
|
||||
setIsDirty(true);
|
||||
};
|
||||
|
||||
// Convert servers to a unified format for display
|
||||
const allServers: MCPServerConfig[] = [
|
||||
...mcpConfig.sse_servers.map((server, index) => ({
|
||||
id: `sse-${index}`,
|
||||
type: "sse" as const,
|
||||
url: typeof server === "string" ? server : server.url,
|
||||
api_key: typeof server === "object" ? server.api_key : undefined,
|
||||
})),
|
||||
...mcpConfig.stdio_servers.map((server, index) => ({
|
||||
id: `stdio-${index}`,
|
||||
type: "stdio" as const,
|
||||
name: server.name,
|
||||
command: server.command,
|
||||
args: server.args,
|
||||
env: server.env,
|
||||
})),
|
||||
...mcpConfig.shttp_servers.map((server, index) => ({
|
||||
id: `shttp-${index}`,
|
||||
type: "shttp" as const,
|
||||
url: typeof server === "string" ? server : server.url,
|
||||
api_key: typeof server === "object" ? server.api_key : undefined,
|
||||
})),
|
||||
];
|
||||
const formAction = () => {
|
||||
if (!settings) return;
|
||||
|
||||
const handleAddServer = (serverConfig: MCPServerConfig) => {
|
||||
addMcpServer(serverConfig, {
|
||||
onSuccess: () => {
|
||||
setView("list");
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const handleEditServer = (serverConfig: MCPServerConfig) => {
|
||||
updateMcpServer(
|
||||
{
|
||||
serverId: serverConfig.id,
|
||||
server: serverConfig,
|
||||
},
|
||||
saveSettings(
|
||||
{ MCP_CONFIG: mcpConfig },
|
||||
{
|
||||
onSuccess: () => {
|
||||
setView("list");
|
||||
displaySuccessToast(t(I18nKey.SETTINGS$SAVED));
|
||||
posthog.capture("settings_saved", {
|
||||
HAS_MCP_CONFIG: mcpConfig ? "YES" : "NO",
|
||||
MCP_SSE_SERVERS_COUNT: mcpConfig?.sse_servers?.length || 0,
|
||||
MCP_STDIO_SERVERS_COUNT: mcpConfig?.stdio_servers?.length || 0,
|
||||
});
|
||||
setIsDirty(false);
|
||||
},
|
||||
onError: (error) => {
|
||||
const errorMessage = retrieveAxiosErrorMessage(error);
|
||||
displayErrorToast(errorMessage || t(I18nKey.ERROR$GENERIC));
|
||||
},
|
||||
},
|
||||
);
|
||||
};
|
||||
|
||||
const handleDeleteServer = (serverId: string) => {
|
||||
deleteMcpServer(serverId, {
|
||||
onSuccess: () => {
|
||||
setConfirmationModalIsVisible(false);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const handleEditClick = (server: MCPServerConfig) => {
|
||||
setEditingServer(server);
|
||||
setView("edit");
|
||||
};
|
||||
|
||||
const handleDeleteClick = (serverId: string) => {
|
||||
setServerToDelete(serverId);
|
||||
setConfirmationModalIsVisible(true);
|
||||
};
|
||||
|
||||
const handleConfirmDelete = () => {
|
||||
if (serverToDelete) {
|
||||
handleDeleteServer(serverToDelete);
|
||||
setServerToDelete(null);
|
||||
}
|
||||
};
|
||||
|
||||
const handleCancelDelete = () => {
|
||||
setConfirmationModalIsVisible(false);
|
||||
setServerToDelete(null);
|
||||
};
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="px-11 py-9 flex flex-col gap-5">
|
||||
<div className="animate-pulse">
|
||||
<div className="h-6 bg-gray-300 rounded w-1/4 mb-4" />
|
||||
<div className="h-4 bg-gray-300 rounded w-1/2 mb-8" />
|
||||
<div className="h-10 bg-gray-300 rounded w-32" />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
return <div className="p-9">{t(I18nKey.HOME$LOADING)}</div>;
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="px-11 py-9 flex flex-col gap-5">
|
||||
{view === "list" && (
|
||||
<>
|
||||
<BrandButton
|
||||
testId="add-mcp-server-button"
|
||||
type="button"
|
||||
variant="primary"
|
||||
onClick={() => setView("add")}
|
||||
isDisabled={isLoading}
|
||||
>
|
||||
{t(I18nKey.SETTINGS$MCP_ADD_SERVER)}
|
||||
</BrandButton>
|
||||
<form
|
||||
data-testid="mcp-settings-screen"
|
||||
action={formAction}
|
||||
className="flex flex-col h-full justify-between"
|
||||
>
|
||||
<div className="p-9 flex flex-col gap-12">
|
||||
<MCPConfigEditor mcpConfig={mcpConfig} onChange={handleConfigChange} />
|
||||
</div>
|
||||
|
||||
<MCPServerList
|
||||
servers={allServers}
|
||||
onEdit={handleEditClick}
|
||||
onDelete={handleDeleteClick}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
|
||||
{view === "add" && (
|
||||
<MCPServerForm
|
||||
mode="add"
|
||||
existingServers={allServers}
|
||||
onSubmit={handleAddServer}
|
||||
onCancel={() => setView("list")}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === "edit" && editingServer && (
|
||||
<MCPServerForm
|
||||
mode="edit"
|
||||
server={editingServer}
|
||||
existingServers={allServers}
|
||||
onSubmit={handleEditServer}
|
||||
onCancel={() => {
|
||||
setView("list");
|
||||
setEditingServer(null);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{confirmationModalIsVisible && (
|
||||
<ConfirmationModal
|
||||
text={t(I18nKey.SETTINGS$MCP_CONFIRM_DELETE)}
|
||||
onConfirm={handleConfirmDelete}
|
||||
onCancel={handleCancelDelete}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex gap-6 p-6 justify-end border-t border-t-tertiary">
|
||||
<BrandButton
|
||||
testId="submit-button"
|
||||
type="submit"
|
||||
variant="primary"
|
||||
isDisabled={!isDirty || isPending}
|
||||
>
|
||||
{!isPending && t(I18nKey.SETTINGS$SAVE_CHANGES)}
|
||||
{isPending && t(I18nKey.SETTINGS$SAVING)}
|
||||
</BrandButton>
|
||||
</div>
|
||||
</form>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ const SAAS_NAV_ITEMS = [
|
||||
{ to: "/settings/billing", text: "SETTINGS$NAV_CREDITS" },
|
||||
{ to: "/settings/secrets", text: "SETTINGS$NAV_SECRETS" },
|
||||
{ to: "/settings/api-keys", text: "SETTINGS$NAV_API_KEYS" },
|
||||
{ to: "/settings/mcp", text: "SETTINGS$NAV_MCP" },
|
||||
];
|
||||
|
||||
const OSS_NAV_ITEMS = [
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user