Compare commits

..

40 Commits

Author SHA1 Message Date
openhands
7561518e4c Fix lint errors 2025-05-19 16:55:19 +00:00
openhands
2cd503f033 Fix merge conflicts with main branch 2025-05-19 16:53:24 +00:00
dependabot[bot]
470687f826 chore(deps): bump the mcp-packages group with 2 updates (#8546)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-05-19 18:37:11 +02:00
tofarr
38b4d93237 Add Session API Key Authentication for Runtime Communication (#8550) 2025-05-19 09:59:22 -06:00
dependabot[bot]
872b97a3c8 chore(deps): bump the version-all group across 1 directory with 20 updates (#8545)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>
2025-05-19 15:49:53 +00:00
sp.wack
14334040f1 chore(frontend): Refactor chat interface-related event handling (#8403) 2025-05-19 15:15:09 +00:00
sp.wack
b244138ec5 fix(frontend): Prevent making too many calls to /git/changes on conversation load (#8579) 2025-05-19 18:57:18 +04:00
Xingyao Wang
4a3d2e6859 Fix #8551: Show images produced in Jupyter Notebook to LLM directly (#8552)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-19 14:14:00 +00:00
luolin101
1a3cb16ba6 add Visual SWE-bench benchmark (#7131)
Co-authored-by: tsukimi <yuailun@pku.edu.cn>
Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com>
2025-05-19 12:08:46 +07:00
Xingyao Wang
2ecc39ffcc [eval]: disable MCP for SWE-Bench evaluation (#8574)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
2025-05-19 01:32:46 +00:00
Graham Neubig
0b26174d60 Add documentation microagent (#8563)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-18 16:34:15 -04:00
Calvin Smith
b0005d4e09 Limit size of browser events (#8559)
Co-authored-by: Calvin Smith <calvin@all-hands.dev>
2025-05-18 11:35:09 -06:00
Graham Neubig
2dc7b37fe8 Fix flaky TestLocalFileStore tests (#8569)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-18 06:37:28 -04:00
openhands
2d434ad49f Rename RuntimeInfo to ContextualInfo 2025-05-17 22:31:31 +00:00
openhands
81c0253d53 Rename memory.set_runtime_info to memory.set_contextual_info 2025-05-17 22:27:48 +00:00
openhands
9cdde313d8 Add context_message parameter to conversation creation endpoint 2025-05-17 22:22:15 +00:00
Carlos Freund
27c18f5bdd build(makefile) Develop in OpenhandsCloud (#7440)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-17 14:40:46 +00:00
Graham Neubig
5077fea5c7 Fix: Run setup.sh script in GitHub resolver (#8548)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-17 09:52:34 -04:00
Justin Coffi
bf383b4881 Add SSH Microagent (#8436)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Xingyao Wang <xingyaoww@gmail.com>
2025-05-17 05:56:51 +00:00
OpenHands
c17b0ebfc6 Fix issue #8304: [Bug]: Non-native tool use converter fails when builtin tools are disabled (#8310)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2025-05-17 06:37:45 +02:00
Xingyao Wang
1f390430e5 feat(MCP, microagent): MCP-support for Repo Microagent & add fetch as default tool (#8360)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-16 23:32:38 +00:00
Robert Brennan
819bad0777 Fix: Only show login modal for genuine 401 errors, not connection issues (#8540)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 13:33:06 -04:00
dependabot[bot]
2faed14139 chore(deps): bump the version-all group with 5 updates (#8541)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 17:17:38 +00:00
tofarr
4733270e3c Add event search endpoints with filtering and pagination (#8538)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 09:51:40 -06:00
Robert Brennan
21dd91de63 Add info logging for 401 Unauthorized responses (#8527)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 11:46:15 -04:00
Graham Neubig
25619c5a93 Fix #8510: Improve error messages for invalid microagent format (#8511)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 15:01:39 +00:00
Robert Brennan
15f71e7ed6 Update microagent message from "Activated" to "ready" (#8536)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 10:44:37 -04:00
mamoodi
5b583e5f27 Organization and user microagents (#8506) 2025-05-16 10:42:32 -04:00
Rohit Malhotra
c191a17afb [Fix]: don't access secrets if doesn't exist (#8535)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 14:30:16 +00:00
Robert Brennan
8ec5d0e043 Add WebSocket connection documentation (#8404)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 10:12:05 -04:00
Robert Brennan
f3f038bb60 Fix mypy error for pkg_resources import (#8537)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 14:06:41 +00:00
Robert Brennan
b8d3027cfe Always enable logout button regardless of GitHub connection status (#8529)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-16 09:09:33 -04:00
Rohit Malhotra
feb04dc65f Plumb custom secrets to runtime (#8330)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 20:06:30 -04:00
Engel Nyst
1f827170f4 Fix resolver test (#8530) 2025-05-15 21:57:02 +00:00
Engel Nyst
f7cb2d0f64 Restore previous conversation in CLI (#8431) 2025-05-15 23:47:41 +02:00
tofarr
033788c2d0 API Updates to facilitate nested runtimes. (#8525) 2025-05-15 15:38:09 -06:00
dependabot[bot]
21d0990be4 chore(deps): bump the version-all group with 7 updates (#8522)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-05-15 20:17:51 +00:00
tofarr
6227073cff Add missing Ukrainian translations for secrets-related keys (#8526)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 19:25:06 +00:00
Graham Neubig
4c38113cb7 Fix CI to check for missing translations (#8486)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-15 13:31:44 -04:00
Emmanuel Ferdman
fb516dfa0f Remove obsolete task.py file (#8517)
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2025-05-15 18:54:25 +02:00
150 changed files with 7495 additions and 5119 deletions

View File

@@ -13,6 +13,10 @@ updates:
browsergym:
patterns:
- "browsergym*"
mcp-packages:
patterns:
- "mcp"
- "mcpm"
security-all:
applies-to: "security-updates"
patterns:

View File

@@ -30,11 +30,12 @@ jobs:
run: |
cd frontend
npm install --frozen-lockfile
- name: Lint and TypeScript compilation
- name: Lint, TypeScript compilation, and translation checks
run: |
cd frontend
npm run lint
npm run make-i18n && tsc
npm run check-translation-completeness
# Run lint on the python code
lint-python:

View File

@@ -0,0 +1,33 @@
---
name: documentation
type: knowledge
version: 1.0.0
agent: CodeActAgent
triggers:
- documentation
- docs
- document
---
# Documentation Guidelines
All documentation must be grounded in fact, so you must not make anything up without proper evidence. When you have finished writing documentation, convey to the user what reference source, including web pages, source code, or other sources of documentation you referenced when writing each new fact in the documentation. If you cannot reference a source for anything do not include it in the pull request.
## Best Practices for Documentation
1. **Be Factual**: Only include information that can be verified from reliable sources.
2. **Cite Sources**: Always reference the source of information (code, web pages, official documentation).
3. **Be Clear and Concise**: Use simple language and avoid unnecessary jargon.
4. **Use Examples**: Include practical examples to illustrate concepts.
5. **Structure Properly**: Use headings, lists, and code blocks to organize information.
6. **Keep Updated**: Ensure documentation reflects the current state of the code or system.
## Documentation Process
1. Research and gather information from reliable sources
2. Draft documentation based on verified facts
3. Review for accuracy and completeness
4. Include references for all factual statements
5. Submit only when all information is properly sourced
Remember: If you cannot verify a piece of information, it's better to exclude it than to include potentially incorrect information.

View File

@@ -5,6 +5,7 @@ SHELL=/usr/bin/env bash
BACKEND_HOST ?= "127.0.0.1"
BACKEND_PORT = 3000
BACKEND_HOST_PORT = "$(BACKEND_HOST):$(BACKEND_PORT)"
FRONTEND_HOST ?= "127.0.0.1"
FRONTEND_PORT = 3001
DEFAULT_WORKSPACE_DIR = "./workspace"
DEFAULT_MODEL = "gpt-4o"
@@ -154,20 +155,21 @@ install-python-dependencies:
fi
@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
if [ -f "/etc/manjaro-release" ]; then \
echo "$(BLUE)Detected Manjaro Linux. Installing Patchright dependencies...$(RESET)"; \
poetry run patchright install chromium; \
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
poetry run pip install playwright; \
poetry run playwright install chromium; \
else \
if [ ! -f cache/patchright_chromium_is_installed.txt ]; then \
echo "Installing patchright chromium..."; \
poetry run patchright install chromium; \
if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
echo "Running playwright install --with-deps chromium..."; \
poetry run playwright install --with-deps chromium; \
mkdir -p cache; \
touch cache/patchright_chromium_is_installed.txt; \
touch cache/playwright_chromium_is_installed.txt; \
else \
echo "Setup already done. Skipping patchright installation."; \
echo "Setup already done. Skipping playwright installation."; \
fi \
fi \
else \
echo "Skipping Patchright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
echo "Skipping Playwright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
fi
@echo "$(GREEN)Python dependencies installed successfully.$(RESET)"
@@ -287,6 +289,15 @@ setup-config-prompts:
@read -p "Enter your LLM base URL [mostly used for local LLMs, leave blank if not needed - example: http://localhost:5001/v1/]: " llm_base_url; \
if [[ ! -z "$$llm_base_url" ]]; then echo "base_url=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; fi
setup-config-basic:
@printf '%s\n' \
'[core]' \
'workspace_base="./workspace"' \
> config.toml
@echo "$(GREEN)config.toml created.$(RESET)"
openhands-cloud-run:
@$(MAKE) run BACKEND_HOST="0.0.0.0" BACKEND_PORT="12000" FRONTEND_HOST="0.0.0.0" FRONTEND_PORT="12001"
# Develop in container
docker-dev:
@@ -321,5 +332,4 @@ help:
@echo " $(GREEN)help$(RESET) - Display this help message, providing information on available targets."
# Phony targets
.PHONY: build check-dependencies check-python check-npm check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint start-backend start-frontend run run-wsl setup-config setup-config-prompts help
.PHONY: docker-dev docker-run
.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help

View File

@@ -0,0 +1,181 @@
---
sidebar_position: 9
---
# Connecting to the WebSocket
This guide explains how to connect to the OpenHands WebSocket API to receive real-time events and send actions to the agent.
## Overview
OpenHands uses [Socket.IO](https://socket.io/) for WebSocket communication between the client and server. The WebSocket connection allows you to:
1. Receive real-time events from the agent
2. Send user actions to the agent
3. Maintain a persistent connection for ongoing conversations
## Connecting to the WebSocket
### Connection Parameters
When connecting to the WebSocket, you need to provide the following query parameters:
- `conversation_id`: The ID of the conversation you want to join
- `latest_event_id`: The ID of the latest event you've received (use `-1` for a new connection)
- `providers_set`: (Optional) A comma-separated list of provider types
### Connection Example
Here's a basic example of connecting to the WebSocket using JavaScript:
```javascript
import { io } from "socket.io-client";
const socket = io("http://localhost:3000", {
transports: ["websocket"],
query: {
conversation_id: "your-conversation-id",
latest_event_id: -1,
providers_set: "github,gitlab" // Optional
}
});
socket.on("connect", () => {
console.log("Connected to OpenHands WebSocket");
});
socket.on("oh_event", (event) => {
console.log("Received event:", event);
});
socket.on("connect_error", (error) => {
console.error("Connection error:", error);
});
socket.on("disconnect", (reason) => {
console.log("Disconnected:", reason);
});
```
## Sending Actions to the Agent
To send an action to the agent, use the `oh_user_action` event:
```javascript
// Send a user message to the agent
socket.emit("oh_user_action", {
type: "message",
source: "user",
message: "Hello, can you help me with my project?"
});
```
## Receiving Events from the Agent
The server emits events using the `oh_event` event type. Here are some common event types you might receive:
- User messages (`source: "user", type: "message"`)
- Agent messages (`source: "agent", type: "message"`)
- File edits (`action: "edit"`)
- File writes (`action: "write"`)
- Command executions (`action: "run"`)
Example event handler:
```javascript
socket.on("oh_event", (event) => {
if (event.source === "agent" && event.type === "message") {
console.log("Agent says:", event.message);
} else if (event.action === "run") {
console.log("Command executed:", event.args.command);
console.log("Result:", event.result);
}
});
```
## Using Websocat for Testing
[Websocat](https://github.com/vi/websocat) is a command-line tool for interacting with WebSockets. It's useful for testing your WebSocket connection without writing a full client application.
### Installation
```bash
# On macOS
brew install websocat
# On Linux
curl -L https://github.com/vi/websocat/releases/download/v1.11.0/websocat.x86_64-unknown-linux-musl > websocat
chmod +x websocat
sudo mv websocat /usr/local/bin/
```
### Connecting to the WebSocket
```bash
# Connect to the WebSocket and print all received messages
echo "40{}" | \
websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
```
### Sending a Message
```bash
# Send a message to the agent
echo '42["oh_user_action",{"type":"message","source":"user","message":"Hello, agent!"}]' | \
websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
```
### Complete Example with Websocat
Here's a complete example of connecting to the WebSocket, sending a message, and receiving events:
```bash
# Start a persistent connection
websocat -v "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
# In another terminal, send a message
echo '42["oh_user_action",{"type":"message","source":"user","message":"Can you help me with my project?"}]' | \
websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
```
## Event Structure
Events sent and received through the WebSocket follow a specific structure:
```typescript
interface OpenHandsEvent {
id: string; // Unique event ID
source: string; // "user" or "agent"
timestamp: string; // ISO timestamp
message?: string; // For message events
type?: string; // Event type (e.g., "message")
action?: string; // Action type (e.g., "run", "edit", "write")
args?: any; // Action arguments
result?: any; // Action result
}
```
## Best Practices
1. **Handle Reconnection**: Implement reconnection logic in your client to handle network interruptions.
2. **Track Event IDs**: Store the latest event ID you've received and use it when reconnecting to avoid duplicate events.
3. **Error Handling**: Implement proper error handling for connection errors and failed actions.
4. **Rate Limiting**: Avoid sending too many actions in a short period to prevent overloading the server.
## Troubleshooting
### Connection Issues
- Verify that the OpenHands server is running and accessible
- Check that you're providing the correct conversation ID
- Ensure your WebSocket URL is correctly formatted
### Authentication Issues
- Make sure you have the necessary authentication cookies if required
- Verify that you have permission to access the specified conversation
### Event Handling Issues
- Check that you're correctly parsing the event data
- Verify that your event handlers are properly registered

View File

@@ -0,0 +1,23 @@
# Organization and User Microagents
## Purpose
Organizations and users can define microagents that apply to all repositories belonging to the organization or user.
## Usage
These microagents can be [any type of microagent](./microagents-overview#microagent-types) and will be loaded
accordingly. However, they are applied to all repositories belonging to the organization or user.
Add a `.openhands` repository under the organization or user and create a `microagents` directory and place the
microagents in that directory.
## Example
General microagent file example for organization `Great-Co` located inside the `.openhands` repository:
`microagents/org-microagent.md`:
```
* Use type hints and error boundaries; validate inputs at system boundaries and fail with meaningful error messages.
* Document interfaces and public APIs; use implementation comments only for non-obvious logic.
* Follow the same naming convention for variables, classes, constants, etc. already used in each repository.
```

View File

@@ -7,7 +7,7 @@ They provide expert guidance, automate common tasks, and ensure consistent pract
Currently OpenHands supports the following types of microagents:
- [General Repository Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
- [General Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
- [Keyword-Triggered Microagents](./microagents-keyword): Guidelines activated by specific keywords in prompts.
To customize OpenHands' behavior, create a .openhands/microagents/ directory in the root of your repository and
@@ -24,7 +24,7 @@ Example repository structure:
some-repository/
└── .openhands/
└── microagents/
└── repo.md # General repository guidelines
└── repo.md # General guidelines
└── trigger_this.md # Microagent triggered by specific keywords
└── trigger_that.md # Microagent triggered by specific keywords
```
@@ -34,7 +34,7 @@ some-repository/
Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
is required:
| Microagent Type | Required |
|----------------------------------|----------|
| `General Repository Microagents` | No |
| `Keyword-Triggered Microagents` | Yes |
| Microagent Type | Required |
|---------------------------------|----------|
| `General Microagents` | No |
| `Keyword-Triggered Microagents` | Yes |

View File

@@ -1,4 +1,4 @@
# General Repository Microagents
# General Microagents
## Purpose
@@ -20,7 +20,7 @@ Frontmatter should be enclosed in triple dashes (---) and may include the follow
## Example
General repository microagent file example located at `.openhands/microagents/repo.md`:
General microagent file example located at `.openhands/microagents/repo.md`:
```
This project is a TODO application that allows users to track TODO items.
@@ -28,4 +28,4 @@ To set it up, you can run `npm run build`.
Always make sure the tests are passing before committing changes. You can run the tests by running `npm run test`.
```
[See more examples of general repository microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)
[See more examples of general microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)

View File

@@ -70,7 +70,7 @@ const sidebars: SidebarsConfig = {
},
{
type: 'doc',
label: 'General Repository Microagents',
label: 'General Microagents',
id: 'usage/prompting/microagents-repo',
},
{
@@ -78,6 +78,11 @@ const sidebars: SidebarsConfig = {
label: 'Keyword-Triggered Microagents',
id: 'usage/prompting/microagents-keyword',
},
{
type: 'doc',
label: 'Organization and User Microagents',
id: 'usage/prompting/microagents-org',
},
{
type: 'doc',
label: 'Global Microagents',
@@ -267,6 +272,11 @@ const sidebars: SidebarsConfig = {
label: 'Evaluation',
id: 'usage/how-to/evaluation-harness',
},
{
type: 'doc',
label: 'WebSocket Connection',
id: 'usage/how-to/websocket-connection',
},
],
},
{

View File

@@ -261,6 +261,7 @@ def get_config(
enable_jupyter=False,
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=False,
enable_mcp=False,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)

View File

@@ -0,0 +1,172 @@
# Visual SWE-Bench Evaluation with Docker Image
This folder contains the evaluation harness that we built on top of the original [Visual SWE-Bench benchmark](https://multi-swe-bench.github.io/#/) ([paper](https://arxiv.org/abs/2412.17315)).
The evaluation consists of three steps:
1. Environment setup: [install python environment](../../README.md#development-environment), [configure LLM config](../../README.md#configure-openhands-and-your-llm), and [pull docker](#openhands-visual-swe-bench-instance-level-docker-support).
2. [Run inference](#run-inference-on-visual-swe-bench-instances): Generate a edit patch for each Github issue.
3. [Evaluate patches using Visual SWE-Bench docker](#evaluate-generated-patches).
## Setup Environment and LLM Configuration
Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
## OpenHands Visual SWE-Bench Instance-level Docker Support
OpenHands now support using the official evaluation docker for both **[inference](#run-inference-on-visual-swe-bench-instances) and [evaluation](#evaluate-generated-patches)**.
This is now the default behavior.
## Run Inference on Visual SWE-Bench Instances
Make sure your Docker daemon is running, and you have ample disk space for the [instance-level docker image](#openhands-visual-swe-bench-instance-level-docker-support).
When the `run_infer.sh` script is started, it will automatically pull the relevant Visual SWE-Bench images. For example, for instance ID `networkx__networkx-6503`, it will try to pull our pre-build docker image `sweb.eval.x86_64.networkx_s_networkx-6503` from DockerHub. This image will be used create an OpenHands runtime image where the agent will operate on.
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers]
# Example
./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 133 30 1
```
where `model_config` is mandatory, and the rest are optional.
- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
LLM settings, as defined in your `config.toml`.
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
like to evaluate. It could also be a release tag like `0.6.2`.
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
to `CodeActAgent`.
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
default, the script evaluates the entire Visual SWE-bench set (133 issues). Note:
in order to use `eval_limit`, you must also set `agent`.
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
default, it is set to 30.
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
default, it is set to 1.
There are also two optional environment variables you can set.
```bash
export USE_HINT_TEXT=true # if you want to use hint text in the evaluation. Default to false. Ignore this if you are not sure.
export USE_INSTANCE_IMAGE=true # if you want to use instance-level docker images. Default to true
```
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
then your command would be:
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
```
### Specify a subset of tasks to run infer
If you would like to specify a list of tasks you'd like to benchmark on, you could
create a `config.toml` under `./evaluation/benchmarks/visual_swe_bench/` folder, and put a list
attribute named `selected_ids`, e.g.
```toml
selected_ids = ['astropy__astropy-13838', 'matplotlib__matplotlib-21617', 'plotly__plotly.py-1966']
```
Then only these tasks (rows whose `instance_id` is in the above list) will be evaluated.
In this case, `eval_limit` option applies to tasks that are in the `selected_ids` list.
After running the inference, you will obtain a `output.jsonl` (by default it will be saved to `evaluation/evaluation_outputs`).
## Evaluate Generated Patches
### Download Docker Images
**(Recommended for reproducibility)** If you have extra local space (e.g., 200GB), you can try pull the instance-level docker images we've prepared by running:
```bash
evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh instance
```
If you want to save disk space a bit, while speeding up the image pre-build process, you can pull the environment-level docker images:
```bash
evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh env
```
If you want to evaluate on the full SWE-Bench test set:
```bash
evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh instance full
```
### Run evaluation
With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patches, and produce a fine-grained report.
**This evaluation is performed using the official dockerized evaluation announced.**
> If you want to evaluate existing results, you should first run this to clone existing outputs
>
>```bash
>git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
>```
NOTE, you should have already pulled the instance-level OR env-level docker images following [this section](#openhands-visual-swe-bench-instance-level-docker-support).
Then you can run the following:
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id]
# Example
./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/luolin101__Visual-SWE-bench-test/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
```
The script now accepts optional arguments:
- `instance_id`: Specify a single instance to evaluate (optional)
For example, to evaluate a specific instance with a custom dataset and split:
```bash
./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL instance_123
```
> You can also pass in a JSONL with SWE-Bench format to `./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh`, where each line is a JSON of `{"model_patch": "XXX", "model_name_or_path": "YYY", "instance_id": "ZZZ"}`.
The final results will be saved to `evaluation/evaluation_outputs/outputs/visual_swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/` with the following files/directory:
- `README.md`: a report showing what are the instances that passed, failed, etc.
- `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
- `logs/`: a directory of test logs
## Visualize Results
First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
```bash
git clone https://huggingface.co/spaces/OpenHands/evaluation
```
**(optional) setup streamlit environment with conda**:
```bash
cd evaluation
conda create -n streamlit python=3.10
conda activate streamlit
pip install -r requirements.txt
```
**run the visualizer**:
Then, in a separate Python environment with `streamlit` library, you can run the following:
```bash
# Make sure you are inside the cloned `evaluation` repo
conda activate streamlit # if you follow the optional conda env setup above
streamlit app.py --server.port 8501 --server.address 0.0.0.0
```
Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
## Submit your evaluation results
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).

View File

@@ -0,0 +1,641 @@
import asyncio
import json
import os
import tempfile
from typing import Any
import pandas as pd
import toml
from datasets import load_dataset
import openhands.agenthub
from evaluation.benchmarks.swe_bench.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
assert_and_raise,
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
AppConfig,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
}
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
return f'{instance.repo}__{instance.version}'.replace('/', '__')
def get_instruction(instance: pd.Series, metadata: EvalMetadata):
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
# Instruction based on Anthropic's official trajectory
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
instruction = (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
'3. Edit the sourcecode of the repo to resolve the issue\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
)
if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
return instruction
# TODO: migrate all swe-bench docker to ghcr.io/openhands
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
def get_instance_docker_image(instance_id: str, official_image: bool = False) -> str:
image_name = 'sweb.eval.x86_64.' + instance_id
image_name = image_name.replace(
'__', '_s_'
) # to comply with docker image naming convention
other_list = [
'plotly__plotly.py-4083',
'plotly__plotly.py-2600',
'plotly__plotly.py-2591',
'plotly__plotly.py-1966',
'networkx__networkx-6503',
'networkx__networkx-6098',
'networkx__networkx-5616',
'networkx__networkx-5354',
'networkx__networkx-5058',
'networkx__networkx-4378',
'networkx__networkx-3764',
'vega__altair-2785',
'vega__altair-1092',
'vega__altair-974',
'vega__altair-830',
'matplotlib__matplotlib-27754',
'matplotlib__matplotlib-26926',
'matplotlib__matplotlib-26788',
'matplotlib__matplotlib-26586',
'sympy__sympy-26941',
'mwaskom__seaborn-3458',
'mwaskom__seaborn-3454',
]
if instance_id in other_list:
return ('docker.io/luolin101/'.rstrip('/') + '/' + image_name).lower()
return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> AppConfig:
# We use a different instance image for the each instance of swe-bench eval
use_official_image = bool(
'verified' in metadata.dataset.lower() or 'lite' in metadata.dataset.lower()
)
base_container_image = get_instance_docker_image(
instance['instance_id'], use_official_image
)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
sandbox_config.enable_auto_lint = True
sandbox_config.use_host_network = False
# Add platform to the sandbox config to solve issue 4401
sandbox_config.platform = 'linux/amd64'
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
)
)
agent_config = AgentConfig(
enable_jupyter=False,
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=False,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
config.set_agent_config(agent_config)
return config
def initialize_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required
):
"""Initialize the runtime for the agent.
This function is called before the runtime is used to run the agent.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Initialization Fn')
logger.info('-' * 30)
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
obs: CmdOutputObservation
# Set instance id
action = CmdRunAction(
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc"""
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
)
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
# inject the init script
script_dir = os.path.dirname(__file__)
# inject the instance info
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
)
swe_instance_json_name = 'swe-bench-instance.json'
with tempfile.TemporaryDirectory() as temp_dir:
# Construct the full path for the desired file name within the temporary directory
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
# Write to the file with the desired name within the temporary directory
with open(temp_file_path, 'w') as f:
if not isinstance(instance, dict):
json.dump([instance.to_dict()], f)
else:
json.dump([instance], f)
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
# inject the instance swe entry
runtime.copy_to(
str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
'/swe_util/',
)
action = CmdRunAction(command='cat ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if isinstance(obs, ErrorObservation):
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git reset --hard')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
action = CmdRunAction(
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
action = CmdRunAction(command='which python')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0 and 'testbed' in obs.content,
f'Expected to find python interpreter from testbed, but got: {str(obs)}',
)
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
logger.info('-' * 30)
def complete_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
) -> dict[str, Any]:
"""Complete the runtime for the agent.
This function is called before the runtime is used to run the agent.
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Completion Fn')
logger.info('-' * 30)
obs: CmdOutputObservation
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if obs.exit_code == -1:
# The previous command is still running
# We need to kill previous command
logger.info('The previous command is still running, trying to kill it...')
action = CmdRunAction(command='C-c')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Then run the command again
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git config --global core.pager ""')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git config --global core.pager "": {str(obs)}',
)
# First check for any git repositories in subdirectories
action = CmdRunAction(command='find . -type d -name .git -not -path "./.git"')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to find git repositories: {str(obs)}',
)
git_dirs = [p for p in obs.content.strip().split('\n') if p]
if git_dirs:
# Remove all .git directories in subdirectories
for git_dir in git_dirs:
action = CmdRunAction(command=f'rm -rf "{git_dir}"')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to remove git directory {git_dir}: {str(obs)}',
)
# add all files
action = CmdRunAction(command='git add -A')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git add -A: {str(obs)}',
)
n_retries = 0
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]}'
)
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
n_retries += 1
if isinstance(obs, CmdOutputObservation):
if obs.exit_code == 0:
git_patch = obs.content.strip()
break
else:
logger.info('Failed to get git diff, retrying...')
sleep_if_should_continue(10)
elif isinstance(obs, ErrorObservation):
logger.error(f'Error occurred: {obs.content}. Retrying...')
sleep_if_should_continue(10)
else:
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
logger.info('-' * 30)
logger.info('END Runtime Completion Fn')
logger.info('-' * 30)
return {'git_patch': git_patch}
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
runtime_failure_count: int = 0,
) -> EvalOutput:
config = get_config(instance, metadata)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
# Increase resource_factor with increasing attempt_id
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
8,
)
logger.warning(
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)
instruction = get_instruction(instance, metadata)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction),
runtime=runtime,
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
metadata.agent_class
],
)
)
# if fatal error, throw EvalError to trigger re-run
if is_fatal_evaluation_error(state.last_error):
raise EvalException('Fatal error detected: ' + state.last_error)
# ======= THIS IS SWE-Bench specific =======
# Get git patch
return_val = complete_runtime(runtime, instance)
git_patch = return_val['git_patch']
logger.info(
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
)
finally:
runtime.close()
# ==========================================
# ======= Attempt to evaluate the agent's edits =======
# we use eval_infer.sh to evaluate the agent's edits, not here
# because the agent may alter the environment / testcases
test_result = {
'git_patch': git_patch,
}
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
# Save the output
output = EvalOutput(
instance_id=instance.instance_id,
instruction=instruction,
instance=instance.to_dict(), # SWE Bench specific
test_result=test_result,
metadata=metadata,
history=histories,
metrics=metrics,
error=state.last_error if state and state.last_error else None,
)
return output
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
if os.path.exists(file_path):
with open(file_path, 'r') as file:
data = toml.load(file)
if 'selected_ids' in data:
selected_ids = data['selected_ids']
logger.info(
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
)
subset = dataset[dataset[filter_column].isin(selected_ids)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
if len(skip_ids) > 0:
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
return dataset[~dataset[filter_column].isin(skip_ids)]
return dataset
# A list of instances that are known to be tricky to infer
# (will cause runtime failure even with resource factor = 8)
SWEGYM_EXCLUDE_IDS = [
'dask__dask-10422',
'pandas-dev__pandas-50548',
'pandas-dev__pandas-53672',
'pandas-dev__pandas-54174',
'pandas-dev__pandas-55518',
'pandas-dev__pandas-58383',
'pydata__xarray-6721',
'pytest-dev__pytest-10081',
'pytest-dev__pytest-7236',
]
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'--dataset',
type=str,
default='princeton-nlp/SWE-bench',
help='data set to evaluate on, either full-test or lite-test',
)
parser.add_argument(
'--split',
type=str,
default='test',
help='split to evaluate on',
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
# so we don't need to manage file uploading to OpenHands's repo
dataset = load_dataset(args.dataset, split=args.split)
swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
logger.info(
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
)
if 'SWE-Gym' in args.dataset:
swe_bench_tests = swe_bench_tests[
~swe_bench_tests['instance_id'].isin(SWEGYM_EXCLUDE_IDS)
]
logger.info(
f'{len(swe_bench_tests)} tasks left after excluding SWE-Gym excluded tasks'
)
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config.log_completions = True
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
details = {}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
dataset_descrption = (
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
)
metadata = make_metadata(
llm_config,
dataset_descrption,
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
details=details,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
print(f'### OUTPUT FILE: {output_file} ###')
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
if len(instances) > 0 and not isinstance(
instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
):
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
instances[col] = instances[col].apply(lambda x: str(x))
run_evaluation(
instances,
metadata,
output_file,
args.eval_num_workers,
process_instance,
timeout_seconds=8 * 60 * 60, # 8 hour PER instance should be more than enough
max_retries=5,
)

View File

@@ -0,0 +1,157 @@
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-11693:latest
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-13838:latest
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-14295:latest
xingyaoww/sweb.eval.x86_64.astropy_s_astropy-8292:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13908:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13980:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13983:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13984:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-14043:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-14623:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-19763:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20470:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20518:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20584:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20761:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20826:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21443:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21490:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21550:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21568:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21617:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22865:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22871:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22931:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23047:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23111:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23412:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24088:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24177:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24189:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24570:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24691:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24749:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24768:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24849:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24870:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24971:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25287:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25334:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25340:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25346:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25405:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25499:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25565:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25640:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25667:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25779:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-26078:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-26466:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2576:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2846:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2979:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3180:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3187:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3202:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3216:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3217:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3276:latest
xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3394:latest
xingyaoww/sweb.eval.x86_64.pydata_s_xarray-4182:latest
xingyaoww/sweb.eval.x86_64.pydata_s_xarray-5682:latest
xingyaoww/sweb.eval.x86_64.pylint-dev_s_pylint-4551:latest
xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-13087:latest
xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-13618:latest
xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-14067:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10048:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10097:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10191:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10435:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-11266:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-11502:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-7615:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-7757:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8028:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8056:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8075:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8120:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8265:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8278:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8620:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8621:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8638:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8658:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9229:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9230:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9289:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9320:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9350:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9464:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9673:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9698:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9797:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9982:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9987:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9997:latest
xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9999:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-11787:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-11788:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-13264:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-13840:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15151:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15304:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15625:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15976:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-16003:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-17067:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-17115:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-18922:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-21769:latest
xingyaoww/sweb.eval.x86_64.sympy_s_sympy-24723:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-4083:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-2600:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-2591:latest
luolin101/sweb.eval.x86_64.plotly_s_plotly.py-1966:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-6503:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-6098:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-5616:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-5354:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-5058:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-4378:latest
luolin101/sweb.eval.x86_64.networkx_s_networkx-3764:latest
luolin101/sweb.eval.x86_64.vega_s_altair-2785:latest
luolin101/sweb.eval.x86_64.vega_s_altair-1092:latest
luolin101/sweb.eval.x86_64.vega_s_altair-974:latest
luolin101/sweb.eval.x86_64.vega_s_altair-830:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-27754:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26926:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26788:latest
luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26586:latest
luolin101/sweb.eval.x86_64.sympy_s_sympy-26941:latest
luolin101/sweb.eval.x86_64.mwaskom_s_seaborn-3458:latest
luolin101/sweb.eval.x86_64.mwaskom_s_seaborn-3454:latest
xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25631:latest
xingyaoww/sweb.env.x86_64.428468730904ff6b4232aa:latest
xingyaoww/sweb.env.x86_64.89a9e6df7ab7bcb9e010c8:latest
xingyaoww/sweb.env.x86_64.15374367de368534f261e3:latest
xingyaoww/sweb.env.x86_64.6b007979cf533f0f3016e8:latest
xingyaoww/sweb.env.x86_64.b382c45e0a94d34ef0fc86:latest
xingyaoww/sweb.env.x86_64.7037e8c448a4b8ebfe9b13:latest
xingyaoww/sweb.env.x86_64.31244378a92e3bcce809ac:latest
xingyaoww/sweb.env.x86_64.efa6065ed5bf204410fd53:latest
xingyaoww/sweb.env.x86_64.a0efca7a0fe6719dbf65c2:latest
xingyaoww/sweb.env.x86_64.502d8fc6ebccd881244091:latest
luolin101/sweb.env.x86_64.eb002359cfcbe2edb56088:latest
xingyaoww/sweb.env.x86_64.d905bb51fb68acc5d4221b:latest
xingyaoww/sweb.env.x86_64.aa92880033da20ca313928:latest
luolin101/sweb.env.x86_64.c6d251a05e0af7688b64fd:latest
xingyaoww/sweb.env.x86_64.c795f4b88616b8462021ed:latest
luolin101/sweb.env.x86_64.1e5a06e76ee016d067d77e:latest
luolin101/sweb.env.x86_64.2e03d8e4d4bd373937a9ef:latest
luolin101/sweb.env.x86_64.4c16026920d27ea78f3b7a:latest
luolin101/sweb.env.x86_64.d15120dfdbda9831e9646b:latest
luolin101/sweb.env.x86_64.c581ba273c3275679773dd:latest
luolin101/sweb.env.x86_64.dc800a1bbe275c5de0c4aa:latest
luolin101/sweb.env.x86_64.59bd7d84a0939c7caba7e6:latest
xingyaoww/sweb.env.x86_64.0d80c7dec81ee2f2f513e2:latest
xingyaoww/sweb.base.x86_64:latest

View File

@@ -0,0 +1,62 @@
#!/bin/bash
set -e
LEVEL=$1
# three levels:
# - base, keyword "sweb.base"
# - env, keyword "sweb.env"
# - instance, keyword "sweb.eval"
SET=$2
if [ -z "$LEVEL" ]; then
echo "Usage: $0 <cache_level> <set>"
echo "cache_level: base, env, or instance"
echo "set: lite, full"
exit 1
fi
if [ -z "$SET" ]; then
echo "Usage: $0 <cache_level> <set>"
echo "cache_level: base, env, or instance"
echo "set: lite, full, default is lite"
SET="lite"
fi
if [ "$SET" == "full" ]; then
IMAGE_FILE="$(dirname "$0")/all-visualswebench-full-instance-images.txt"
else
IMAGE_FILE="$(dirname "$0")/all-visualswebench-full-instance-images.txt"
fi
# Define a pattern based on the level
case $LEVEL in
base)
PATTERN="sweb.base"
;;
env)
PATTERN="sweb.base\|sweb.env"
;;
instance)
PATTERN="sweb.base\|sweb.env\|sweb.eval"
;;
*)
echo "Invalid cache level: $LEVEL"
echo "Valid levels are: base, env, instance"
exit 1
;;
esac
echo "Pulling docker images for [$LEVEL] level"
echo "Pattern: $PATTERN"
echo "Image file: $IMAGE_FILE"
# Read each line from the file, filter by pattern, and pull the docker image
grep "$PATTERN" "$IMAGE_FILE" | while IFS= read -r image; do
echo "Pulling $image into $image"
docker pull $image
# replace _s_ to __ in the image name
renamed_image=$(echo "$image" | sed 's|.*/||; s/_s_/__/g')
docker tag $image $renamed_image
done

View File

@@ -0,0 +1,141 @@
#!/bin/bash
PROCESS_FILEPATH=$1
if [ -z "$PROCESS_FILEPATH" ]; then
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
exit 1
fi
if [ ! -f $PROCESS_FILEPATH ]; then
echo "Error: $PROCESS_FILEPATH is not a file"
exit 1
fi
# If instance_id is empty, it means we want to eval on the whole $PROCESS_FILEPATH
# otherwise, we want to eval on the instance_id
INSTANCE_ID=$2
DATASET_NAME=${3:-"luolin101/Visual-SWE-bench"}
SPLIT=${4:-"test"}
echo "INSTANCE_ID: $INSTANCE_ID"
echo "DATASET_NAME: $DATASET_NAME"
echo "SPLIT: $SPLIT"
PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
FILE_DIR=$(dirname $PROCESS_FILEPATH)
FILE_NAME=$(basename $PROCESS_FILEPATH)
echo "Evaluating $FILE_NAME @ $FILE_DIR"
# ================================================
# detect whether PROCESS_FILEPATH is in OH format or in SWE-bench format
echo "=============================================================="
echo "Detecting whether PROCESS_FILEPATH is in OH format or in SWE-bench format"
echo "=============================================================="
# SWE-bench format is a JSONL where every line has three fields: model_name_or_path, instance_id, and model_patch
function is_swebench_format() {
# Read the first line of the file
read -r first_line < "$PROCESS_FILEPATH"
# Use jq to check if the first line has the required fields
echo "$first_line" | jq -e '. | has("model_name_or_path") and has("instance_id") and has("model_patch")' > /dev/null
if [ $? -ne 0 ]; then
return 1 # Return 1 if the first line does not have the required fields
fi
return 0 # Return 0 if the first line has the required fields
}
# Call the function with the file path
is_swebench_format "$PROCESS_FILEPATH"
IS_SWEBENCH_FORMAT=$?
# Use the result in an if-else statement
if [ $IS_SWEBENCH_FORMAT -eq 0 ]; then
echo "The file IS in SWE-bench format."
SWEBENCH_FORMAT_JSONL=$PROCESS_FILEPATH
else
echo "The file IS NOT in SWE-bench format."
# ==== Convert OH format to SWE-bench format ====
echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
# replace .jsonl with .swebench.jsonl in filename
SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
# assert that the file exists
if [ ! -f $SWEBENCH_FORMAT_JSONL ]; then
echo "Error: $SWEBENCH_FORMAT_JSONL does not exist. There is probably an error in the conversion process."
exit 1
fi
SWEBENCH_FORMAT_JSONL=$(realpath $SWEBENCH_FORMAT_JSONL)
fi
# ================================================
echo "=============================================================="
echo "Running SWE-bench evaluation"
echo "=============================================================="
RUN_ID=$(date +"%Y%m%d_%H%M%S")
N_PROCESS=16
if [ -z "$INSTANCE_ID" ]; then
echo "Running SWE-bench evaluation on the whole input file..."
# Default to SWE-Bench-lite
# change `--dataset_name` and `--split` to alter dataset
poetry run python -m visualswebench.harness.run_evaluation \
--dataset_name "$DATASET_NAME" \
--split "$SPLIT" \
--predictions_path $SWEBENCH_FORMAT_JSONL \
--timeout 1800 \
--cache_level instance \
--max_workers $N_PROCESS \
--run_id $RUN_ID
# get the "model_name_or_path" from the first line of the SWEBENCH_FORMAT_JSONL
MODEL_NAME_OR_PATH=$(jq -r '.model_name_or_path' $SWEBENCH_FORMAT_JSONL | head -n 1)
echo "MODEL_NAME_OR_PATH: $MODEL_NAME_OR_PATH"
RESULT_OUTPUT_DIR=$(dirname $SWEBENCH_FORMAT_JSONL)
echo "RESULT_OUTPUT_DIR: $RESULT_OUTPUT_DIR"
# move the eval results to the target directory
mkdir -p $RESULT_OUTPUT_DIR
# rm eval_outputs directory if it exists
if [ -d $RESULT_OUTPUT_DIR/eval_outputs ]; then
rm -rf $RESULT_OUTPUT_DIR/eval_outputs
fi
mv logs/run_evaluation/$RUN_ID/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR
mv $RESULT_OUTPUT_DIR/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR/eval_outputs
echo "RUN_ID: $RUN_ID" > $RESULT_OUTPUT_DIR/run_id.txt
# move report file
REPORT_PATH=$MODEL_NAME_OR_PATH.$RUN_ID.json
if [ -f $REPORT_PATH ]; then
# check if $RESULT_OUTPUT_DIR/report.json exists
if [ -f $RESULT_OUTPUT_DIR/report.json ]; then
echo "Report file $RESULT_OUTPUT_DIR/report.json already exists. Overwriting..."
if [ -f $RESULT_OUTPUT_DIR/report.json.bak ]; then
rm $RESULT_OUTPUT_DIR/report.json.bak
fi
mv $RESULT_OUTPUT_DIR/report.json $RESULT_OUTPUT_DIR/report.json.bak
fi
mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
fi
poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
else
echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
poetry run python -m visualswebench.harness.run_evaluation \
--dataset_name "$DATASET_NAME" \
--split "$SPLIT" \
--predictions_path $SWEBENCH_FORMAT_JSONL \
--timeout 1800 \
--instance_ids $INSTANCE_ID \
--cache_level instance \
--max_workers $N_PROCESS \
--run_id $RUN_ID
fi

View File

@@ -0,0 +1,117 @@
#!/bin/bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
COMMIT_HASH=$2
AGENT=$3
EVAL_LIMIT=$4
MAX_ITER=$5
NUM_WORKERS=$6
DATASET=$7
SPLIT=$8
N_RUNS=$9
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
fi
checkout_eval_branch
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
fi
if [ -z "$USE_INSTANCE_IMAGE" ]; then
echo "USE_INSTANCE_IMAGE not specified, use default true"
USE_INSTANCE_IMAGE=true
fi
if [ -z "$RUN_WITH_BROWSING" ]; then
echo "RUN_WITH_BROWSING not specified, use default false"
RUN_WITH_BROWSING=false
fi
if [ -z "$DATASET" ]; then
echo "DATASET not specified, use default luolin101/Visual-SWE-bench"
DATASET="luolin101/Visual-SWE-bench"
fi
if [ -z "$SPLIT" ]; then
echo "SPLIT not specified, use default test"
SPLIT="test"
fi
export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
get_openhands_version
echo "AGENT: $AGENT"
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "SPLIT: $SPLIT"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
export USE_HINT_TEXT=false
fi
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
EVAL_NOTE="$OPENHANDS_VERSION"
# if not using Hint, add -no-hint to the eval note
if [ "$USE_HINT_TEXT" = false ]; then
EVAL_NOTE="$EVAL_NOTE-no-hint"
fi
if [ "$RUN_WITH_BROWSING" = true ]; then
EVAL_NOTE="$EVAL_NOTE-with-browsing"
fi
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
local eval_note=$1
COMMAND="poetry run python evaluation/benchmarks/visual_swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations $MAX_ITER \
--eval-num-workers $NUM_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
}
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
if [ -z "$N_RUNS" ]; then
N_RUNS=1
echo "N_RUNS not specified, use default $N_RUNS"
fi
for i in $(seq 1 $N_RUNS); do
current_eval_note="$EVAL_NOTE-run_$i"
echo "EVAL_NOTE: $current_eval_note"
run_eval $current_eval_note
done
checkout_original_branch

View File

@@ -0,0 +1,40 @@
#!/bin/bash
source ~/.bashrc
SWEUTIL_DIR=/swe_util
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
# SWE_INSTANCE_ID=django__django-11099
if [ -z "$SWE_INSTANCE_ID" ]; then
echo "Error: SWE_INSTANCE_ID is not set." >&2
exit 1
fi
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
if [[ -z "$item" ]]; then
echo "No item found for the provided instance ID."
exit 1
fi
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
# Clear the workspace
if [ -d /workspace ]; then
rm -rf /workspace/*
else
mkdir /workspace
fi
# Copy repo to workspace
if [ -d /workspace/$WORKSPACE_NAME ]; then
rm -rf /workspace/$WORKSPACE_NAME
fi
mkdir -p /workspace
cp -r /testbed /workspace/$WORKSPACE_NAME
# Activate instance-specific environment
. /opt/miniconda3/etc/profile.d/conda.sh
conda activate testbed

View File

@@ -10,11 +10,7 @@ describe("ChatMessage", () => {
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it("should render an assistant message", () => {
render(<ChatMessage type="assistant" message="Hello, World!" />);
expect(screen.getByTestId("assistant-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it.todo("should render an assistant message");
it.skip("should support code syntax highlighting", () => {
const code = "```js\nconsole.log('Hello, World!')\n```";
@@ -66,10 +62,7 @@ describe("ChatMessage", () => {
it("should apply correct styles to inline code", () => {
render(
<ChatMessage
type="assistant"
message="Here is some `inline code` text"
/>,
<ChatMessage type="agent" message="Here is some `inline code` text" />,
);
const codeElement = screen.getByText("inline code");

View File

@@ -1,11 +1,9 @@
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import { act, screen, waitFor, within } from "@testing-library/react";
import { screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import type { Message } from "#/message";
import { addUserMessage } from "#/state/chat-slice";
import { SUGGESTIONS } from "#/utils/suggestions";
import * as ChatSlice from "#/state/chat-slice";
import { WsClientProviderStatus } from "#/context/ws-client-provider";
import { ChatInterface } from "#/components/features/chat/chat-interface";
@@ -42,51 +40,10 @@ describe("Empty state", () => {
vi.clearAllMocks();
});
it("should render suggestions if empty", () => {
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
expect(screen.getByTestId("suggestions")).toBeInTheDocument();
act(() => {
store.dispatch(
addUserMessage({
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
pending: true,
}),
);
});
expect(screen.queryByTestId("suggestions")).not.toBeInTheDocument();
});
it.todo("should render suggestions if empty");
it("should render the default suggestions", () => {
renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
renderWithProviders(<ChatInterface />);
const suggestions = screen.getByTestId("suggestions");
const repoSuggestions = Object.keys(SUGGESTIONS.repo);
@@ -110,21 +67,8 @@ describe("Empty state", () => {
status: WsClientProviderStatus.CONNECTED,
isLoadingMessages: false,
}));
const addUserMessageSpy = vi.spyOn(ChatSlice, "addUserMessage");
const user = userEvent.setup();
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
renderWithProviders(<ChatInterface />);
const suggestions = screen.getByTestId("suggestions");
const displayedSuggestions = within(suggestions).getAllByRole("button");
@@ -133,9 +77,7 @@ describe("Empty state", () => {
await user.click(displayedSuggestions[0]);
// user message loaded to input
expect(addUserMessageSpy).not.toHaveBeenCalled();
expect(screen.queryByTestId("suggestions")).toBeInTheDocument();
expect(store.getState().chat.messages).toHaveLength(0);
expect(input).toHaveValue(displayedSuggestions[0].textContent);
},
);
@@ -149,19 +91,7 @@ describe("Empty state", () => {
isLoadingMessages: false,
}));
const user = userEvent.setup();
const { rerender } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
},
});
const { rerender } = renderWithProviders(<ChatInterface />);
const suggestions = screen.getByTestId("suggestions");
const displayedSuggestions = within(suggestions).getAllByRole("button");

View File

@@ -20,7 +20,6 @@ describe("AccountSettingsContextMenu", () => {
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
@@ -35,7 +34,6 @@ describe("AccountSettingsContextMenu", () => {
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
@@ -45,19 +43,18 @@ describe("AccountSettingsContextMenu", () => {
expect(onLogoutMock).toHaveBeenCalledOnce();
});
test("onLogout should be disabled if the user is not logged in", async () => {
test("logout button is always enabled", async () => {
render(
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn={false}
/>,
);
const logoutOption = screen.getByText("ACCOUNT_SETTINGS$LOGOUT");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
expect(onLogoutMock).toHaveBeenCalledOnce();
});
it("should call onClose when clicking outside of the element", async () => {
@@ -65,7 +62,6 @@ describe("AccountSettingsContextMenu", () => {
<AccountSettingsContextMenu
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);

View File

@@ -22,7 +22,7 @@ const renderRepoConnector = () => {
path: "/conversations/:conversationId",
},
{
Component: Outlet,
Component: () => <Outlet />,
path: "/settings",
children: [
{

View File

@@ -11,7 +11,7 @@ import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
const renderTaskSuggestions = () => {
const RouterStub = createRoutesStub([
{
Component: TaskSuggestions,
Component: () => <TaskSuggestions />,
path: "/",
},
{

View File

@@ -1,92 +1,11 @@
import { render, screen } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { Messages } from "#/components/features/chat/messages";
import type { Message } from "#/message";
import { renderWithProviders } from "test-utils";
// Mock the useParams hook to provide a conversationId
vi.mock("react-router", async () => {
const actual = await vi.importActual<typeof import("react-router")>("react-router");
return {
...actual,
useParams: () => ({ conversationId: "test-conversation-id" }),
};
});
import { describe, it } from "vitest";
describe("File Operations Messages", () => {
it("should show success indicator for successful file read operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "read_file_contents",
content: "Successfully read file contents",
success: true,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
it.todo("should show success indicator for successful file read operation");
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
it.todo("should show failure indicator for failed file read operation");
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-success");
});
it.todo("should show success indicator for successful file edit operation");
it("should show failure indicator for failed file read operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "read_file_contents",
content: "Failed to read file contents",
success: false,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-danger");
});
it("should show success indicator for successful file edit operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "edit_file_contents",
content: "Successfully edited file contents",
success: true,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-success");
});
it("should show failure indicator for failed file edit operation", () => {
const messages: Message[] = [
{
type: "action",
translationID: "edit_file_contents",
content: "Failed to edit file contents",
success: false,
sender: "assistant",
timestamp: new Date().toISOString(),
},
];
renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
const statusIcon = screen.getByTestId("status-icon");
expect(statusIcon).toBeInTheDocument();
expect(statusIcon.closest("svg")).toHaveClass("fill-danger");
});
it.todo("should show failure indicator for failed file edit operation");
});

View File

@@ -57,7 +57,7 @@ describe("UserActions", () => {
).not.toBeInTheDocument();
});
test("onLogout should not be called when the user is not logged in", async () => {
test("logout button is always enabled", async () => {
render(<UserActions onLogout={onLogoutMock} />);
const userAvatar = screen.getByTestId("user-avatar");
@@ -66,6 +66,6 @@ describe("UserActions", () => {
const logoutOption = screen.getByText("ACCOUNT_SETTINGS$LOGOUT");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
expect(onLogoutMock).toHaveBeenCalledOnce();
});
});

View File

@@ -2,7 +2,6 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
import { render, waitFor } from "@testing-library/react";
import React from "react";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import * as ChatSlice from "#/state/chat-slice";
import {
updateStatusWhenErrorMessagePresent,
WsClientProvider,
@@ -11,42 +10,15 @@ import {
describe("Propagate error message", () => {
it("should do nothing when no message was passed from server", () => {
const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
updateStatusWhenErrorMessagePresent(null);
updateStatusWhenErrorMessagePresent(undefined);
updateStatusWhenErrorMessagePresent({});
updateStatusWhenErrorMessagePresent({ message: null });
expect(addErrorMessageSpy).not.toHaveBeenCalled();
});
it("should display error to user when present", () => {
const message = "We have a problem!";
const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
updateStatusWhenErrorMessagePresent({ message });
it.todo("should display error to user when present");
expect(addErrorMessageSpy).toHaveBeenCalledWith({
message,
status_update: true,
type: "error",
});
});
it("should display error including translation id when present", () => {
const message = "We have a problem!";
const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
updateStatusWhenErrorMessagePresent({
message,
data: { msg_id: "..id.." },
});
expect(addErrorMessageSpy).toHaveBeenCalledWith({
message,
id: "..id..",
status_update: true,
type: "error",
});
});
it.todo("should display error including translation id when present");
});
// Create a mock for socket.io-client

View File

@@ -11,7 +11,6 @@ describe("Translations", () => {
<AccountSettingsContextMenu
onLogout={() => {}}
onClose={() => {}}
isLoggedIn
/>,
);
expect(

View File

@@ -22,7 +22,7 @@ const MOCK_GET_SECRETS_RESPONSE: GetSecretsResponse["custom_secrets"] = [
const RouterStub = createRoutesStub([
{
Component: Outlet,
Component: () => <Outlet />,
path: "/settings",
children: [
{

View File

@@ -1,146 +0,0 @@
import { describe, it, expect, vi, beforeEach } from "vitest";
import { handleStatusMessage, handleActionMessage } from "#/services/actions";
import store from "#/store";
import { trackError } from "#/utils/error-handler";
import ActionType from "#/types/action-type";
import { ActionMessage } from "#/types/message";
// Mock dependencies
vi.mock("#/utils/error-handler", () => ({
trackError: vi.fn(),
}));
vi.mock("#/store", () => ({
default: {
dispatch: vi.fn(),
},
}));
describe("Actions Service", () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe("handleStatusMessage", () => {
it("should dispatch info messages to status state", () => {
const message = {
type: "info",
message: "Runtime is not available",
id: "runtime.unavailable",
status_update: true as const,
};
handleStatusMessage(message);
expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
payload: message,
}));
});
it("should log error messages and display them in chat", () => {
const message = {
type: "error",
message: "Runtime connection failed",
id: "runtime.connection.failed",
status_update: true as const,
};
handleStatusMessage(message);
expect(trackError).toHaveBeenCalledWith({
message: "Runtime connection failed",
source: "chat",
metadata: { msgId: "runtime.connection.failed" },
});
expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
payload: message,
}));
});
});
describe("handleActionMessage", () => {
it("should use first-person perspective for task completion messages", () => {
// Test partial completion
const messagePartial: ActionMessage = {
id: 1,
action: ActionType.FINISH,
source: "agent",
message: "",
timestamp: new Date().toISOString(),
args: {
final_thought: "",
task_completed: "partial",
outputs: "",
thought: ""
}
};
// Mock implementation to capture the message
let capturedPartialMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed partially**")) {
capturedPartialMessage = action.payload;
}
});
handleActionMessage(messagePartial);
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
// Test not completed
const messageNotCompleted: ActionMessage = {
id: 2,
action: ActionType.FINISH,
source: "agent",
message: "",
timestamp: new Date().toISOString(),
args: {
final_thought: "",
task_completed: "false",
outputs: "",
thought: ""
}
};
// Mock implementation to capture the message
let capturedNotCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **not completed**")) {
capturedNotCompletedMessage = action.payload;
}
});
handleActionMessage(messageNotCompleted);
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
// Test completed successfully
const messageCompleted: ActionMessage = {
id: 3,
action: ActionType.FINISH,
source: "agent",
message: "",
timestamp: new Date().toISOString(),
args: {
final_thought: "",
task_completed: "true",
outputs: "",
thought: ""
}
};
// Mock implementation to capture the message
let capturedCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed successfully**")) {
capturedCompletedMessage = action.payload;
}
});
handleActionMessage(messageCompleted);
expect(capturedCompletedMessage).toContain("I believe that the task was **completed successfully**");
});
});
});

View File

@@ -1,51 +0,0 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { handleObservationMessage } from "#/services/observations";
import store from "#/store";
import { ObservationMessage } from "#/types/message";
// Mock dependencies
vi.mock("#/store", () => ({
default: {
dispatch: vi.fn(),
},
}));
describe("Observations Service", () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe("handleObservationMessage", () => {
const createErrorMessage = (): ObservationMessage => ({
id: 14,
timestamp: "2025-04-14T13:37:54.451843",
message: "The action has not been executed.",
cause: 12,
observation: "error",
content: "The action has not been executed.",
extras: {
error_id: "",
metadata: {},
},
});
it("should dispatch error messages exactly once", () => {
const errorMessage = createErrorMessage();
handleObservationMessage(errorMessage);
expect(store.dispatch).toHaveBeenCalledTimes(1);
expect(store.dispatch).toHaveBeenCalledWith({
type: "chat/addAssistantObservation",
payload: expect.objectContaining({
observation: "error",
content: "The action has not been executed.",
source: "user",
extras: {
error_id: "",
},
}),
});
});
});
});

View File

@@ -1,8 +1,4 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { handleObservationMessage } from "#/services/observations";
import { setScreenshotSrc, setUrl } from "#/state/browser-slice";
import ObservationType from "#/types/observation-type";
import store from "#/store";
import { describe, it, vi, beforeEach, afterEach } from "vitest";
// Mock the store module
vi.mock("#/store", () => ({
@@ -20,43 +16,9 @@ describe("handleObservationMessage", () => {
vi.resetAllMocks();
});
it("updates browser state when receiving a browse observation", () => {
const message = {
id: "test-id",
cause: "test-cause",
observation: ObservationType.BROWSE,
content: "test content",
message: "test message",
extras: {
url: "https://example.com",
screenshot: "base64-screenshot-data",
},
};
it.todo("updates browser state when receiving a browse observation");
handleObservationMessage(message);
// Check that setScreenshotSrc and setUrl were called with the correct values
expect(store.dispatch).toHaveBeenCalledWith(setScreenshotSrc("base64-screenshot-data"));
expect(store.dispatch).toHaveBeenCalledWith(setUrl("https://example.com"));
});
it("updates browser state when receiving a browse_interactive observation", () => {
const message = {
id: "test-id",
cause: "test-cause",
observation: ObservationType.BROWSE_INTERACTIVE,
content: "test content",
message: "test message",
extras: {
url: "https://example.com",
screenshot: "base64-screenshot-data",
},
};
handleObservationMessage(message);
// Check that setScreenshotSrc and setUrl were called with the correct values
expect(store.dispatch).toHaveBeenCalledWith(setScreenshotSrc("base64-screenshot-data"));
expect(store.dispatch).toHaveBeenCalledWith(setUrl("https://example.com"));
});
it.todo(
"updates browser state when receiving a browse_interactive observation",
);
});

File diff suppressed because it is too large Load Diff

View File

@@ -8,30 +8,30 @@
},
"dependencies": {
"@heroui/react": "2.7.8",
"@microlink/react-json-view": "^1.26.1",
"@microlink/react-json-view": "^1.26.2",
"@monaco-editor/react": "^4.7.0-rc.0",
"@react-router/node": "^7.5.3",
"@react-router/serve": "^7.5.3",
"@react-router/node": "^7.6.0",
"@react-router/serve": "^7.6.0",
"@react-types/shared": "^3.29.0",
"@reduxjs/toolkit": "^2.7.0",
"@reduxjs/toolkit": "^2.8.2",
"@stripe/react-stripe-js": "^3.7.0",
"@stripe/stripe-js": "^7.3.0",
"@tanstack/react-query": "^5.75.4",
"@tanstack/react-query": "^5.76.1",
"@vitejs/plugin-react": "^4.4.0",
"@xterm/addon-fit": "^0.10.0",
"@xterm/xterm": "^5.4.0",
"axios": "^1.9.0",
"clsx": "^2.1.1",
"eslint-config-airbnb-typescript": "^18.0.0",
"framer-motion": "^12.10.0",
"i18next": "^25.1.1",
"framer-motion": "^12.12.1",
"i18next": "^25.1.3",
"i18next-browser-languagedetector": "^8.1.0",
"i18next-http-backend": "^3.0.2",
"isbot": "^5.1.27",
"isbot": "^5.1.28",
"jose": "^6.0.11",
"lucide-react": "^0.507.0",
"lucide-react": "^0.511.0",
"monaco-editor": "^0.52.2",
"posthog-js": "^1.239.1",
"posthog-js": "^1.242.2",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-highlight": "^0.15.0",
@@ -40,15 +40,15 @@
"react-icons": "^5.5.0",
"react-markdown": "^10.1.0",
"react-redux": "^9.2.0",
"react-router": "^7.5.3",
"react-router": "^7.6.0",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.9",
"remark-gfm": "^4.0.1",
"sirv-cli": "^3.0.1",
"socket.io-client": "^4.8.1",
"tailwind-merge": "^3.2.0",
"tailwind-merge": "^3.3.0",
"vite": "^6.3.5",
"web-vitals": "^3.5.2",
"web-vitals": "^5.0.1",
"ws": "^8.18.2"
},
"scripts": {
@@ -83,16 +83,16 @@
"@babel/types": "^7.27.0",
"@mswjs/socket.io-binding": "^0.1.1",
"@playwright/test": "^1.52.0",
"@react-router/dev": "^7.5.3",
"@react-router/dev": "^7.6.0",
"@tailwindcss/typography": "^0.5.16",
"@tanstack/eslint-plugin-query": "^5.74.7",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/node": "^22.15.12",
"@types/react": "^19.1.3",
"@types/react-dom": "^19.1.3",
"@types/node": "^22.15.18",
"@types/react": "^19.1.4",
"@types/react-dom": "^19.1.5",
"@types/react-highlight": "^0.12.8",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/ws": "^8.18.1",
@@ -104,7 +104,7 @@
"eslint": "^8.57.0",
"eslint-config-airbnb": "^19.0.4",
"eslint-config-airbnb-typescript": "^18.0.0",
"eslint-config-prettier": "^10.1.3",
"eslint-config-prettier": "^10.1.5",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-jsx-a11y": "^6.10.2",
"eslint-plugin-prettier": "^5.4.0",
@@ -113,7 +113,7 @@
"eslint-plugin-unused-imports": "^4.1.4",
"husky": "^9.1.7",
"jsdom": "^26.1.0",
"lint-staged": "^15.5.2",
"lint-staged": "^16.0.0",
"msw": "^2.6.6",
"postcss": "^8.5.2",
"prettier": "^3.5.3",

View File

@@ -76,9 +76,9 @@ class OpenHands {
): Promise<boolean> {
if (appMode === "oss") return true;
const response =
await openHands.post<AuthenticateResponse>("/api/authenticate");
return response.status === 200;
// Just make the request, if it succeeds (no exception thrown), return true
await openHands.post<AuthenticateResponse>("/api/authenticate");
return true;
}
/**

View File

@@ -1,4 +1,4 @@
import { useDispatch, useSelector } from "react-redux";
import { useSelector } from "react-redux";
import React from "react";
import posthog from "posthog-js";
import { useParams } from "react-router";
@@ -8,7 +8,6 @@ import { convertImageToBase64 } from "#/utils/convert-image-to-base-64";
import { TrajectoryActions } from "../trajectory/trajectory-actions";
import { createChatMessage } from "#/services/chat-service";
import { InteractiveChatBox } from "./interactive-chat-box";
import { addUserMessage } from "#/state/chat-slice";
import { RootState } from "#/store";
import { AgentState } from "#/types/agent-state";
import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
@@ -25,6 +24,11 @@ import { LoadingSpinner } from "#/components/shared/loading-spinner";
import { useGetTrajectory } from "#/hooks/mutation/use-get-trajectory";
import { downloadTrajectory } from "#/utils/download-trajectory";
import { displayErrorToast } from "#/utils/custom-toast-handlers";
import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
import { useWSErrorMessage } from "#/hooks/use-ws-error-message";
import i18n from "#/i18n";
import { ErrorMessageBanner } from "./error-message-banner";
import { shouldRenderEvent } from "./event-content-helpers/should-render-event";
function getEntryPoint(
hasRepository: boolean | null,
@@ -36,14 +40,15 @@ function getEntryPoint(
}
export function ChatInterface() {
const { send, isLoadingMessages } = useWsClient();
const dispatch = useDispatch();
const { getErrorMessage } = useWSErrorMessage();
const { send, isLoadingMessages, parsedEvents } = useWsClient();
const { setOptimisticUserMessage, getOptimisticUserMessage } =
useOptimisticUserMessage();
const { t } = useTranslation();
const scrollRef = React.useRef<HTMLDivElement>(null);
const { scrollDomToBottom, onChatBodyScroll, hitBottom } =
useScrollToBottom(scrollRef);
const { messages } = useSelector((state: RootState) => state.chat);
const { curAgentState } = useSelector((state: RootState) => state.agent);
const [feedbackPolarity, setFeedbackPolarity] = React.useState<
@@ -57,8 +62,13 @@ export function ChatInterface() {
const params = useParams();
const { mutate: getTrajectory } = useGetTrajectory();
const optimisticUserMessage = getOptimisticUserMessage();
const errorMessage = getErrorMessage();
const events = parsedEvents.filter(shouldRenderEvent);
const handleSendMessage = async (content: string, files: File[]) => {
if (messages.length === 0) {
if (events.length === 0) {
posthog.capture("initial_query_submitted", {
entry_point: getEntryPoint(
selectedRepository !== null,
@@ -69,7 +79,7 @@ export function ChatInterface() {
});
} else {
posthog.capture("user_message_sent", {
session_message_count: messages.length,
session_message_count: events.length,
current_message_length: content.length,
});
}
@@ -77,9 +87,8 @@ export function ChatInterface() {
const imageUrls = await Promise.all(promises);
const timestamp = new Date().toISOString();
const pending = true;
dispatch(addUserMessage({ content, imageUrls, timestamp, pending }));
send(createChatMessage(content, imageUrls, timestamp));
setOptimisticUserMessage(content);
setMessageToSend(null);
};
@@ -120,7 +129,7 @@ export function ChatInterface() {
return (
<div className="h-full flex flex-col justify-between">
{messages.length === 0 && (
{events.length === 0 && !optimisticUserMessage && (
<ChatSuggestions onSuggestionsClick={setMessageToSend} />
)}
@@ -137,7 +146,7 @@ export function ChatInterface() {
{!isLoadingMessages && (
<Messages
messages={messages}
messages={events}
isAwaitingUserConfirmation={
curAgentState === AgentState.AWAITING_USER_CONFIRMATION
}
@@ -170,6 +179,12 @@ export function ChatInterface() {
{!hitBottom && <ScrollToBottomButton onClick={scrollDomToBottom} />}
</div>
{errorMessage && (
<ErrorMessageBanner
message={i18n.exists(errorMessage) ? t(errorMessage) : errorMessage}
/>
)}
<InteractiveChatBox
onSubmit={handleSendMessage}
onStop={handleStop}

View File

@@ -6,10 +6,11 @@ import { cn } from "#/utils/utils";
import { ul, ol } from "../markdown/list";
import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipboard-button";
import { anchor } from "../markdown/anchor";
import { OpenHandsSourceType } from "#/types/core/base";
import { paragraph } from "../markdown/paragraph";
interface ChatMessageProps {
type: "user" | "assistant";
type: OpenHandsSourceType;
message: string;
}
@@ -49,7 +50,7 @@ export function ChatMessage({
"rounded-xl relative",
"flex flex-col gap-2",
type === "user" && " max-w-[305px] p-4 bg-tertiary self-end",
type === "assistant" && "mt-6 max-w-full bg-transparent",
type === "agent" && "mt-6 max-w-full bg-transparent",
)}
>
<CopyToClipboardButton

View File

@@ -0,0 +1,11 @@
interface ErrorMessageBannerProps {
message: string;
}
export function ErrorMessageBanner({ message }: ErrorMessageBannerProps) {
return (
<div className="w-full rounded-lg p-2 text-black border border-red-800 bg-red-500">
{message}
</div>
);
}

View File

@@ -0,0 +1,56 @@
import React from "react";
import Markdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { useTranslation } from "react-i18next";
import { code } from "../markdown/code";
import { ol, ul } from "../markdown/list";
import ArrowDown from "#/icons/angle-down-solid.svg?react";
import ArrowUp from "#/icons/angle-up-solid.svg?react";
import i18n from "#/i18n";
interface ErrorMessageProps {
errorId?: string;
defaultMessage: string;
}
export function ErrorMessage({ errorId, defaultMessage }: ErrorMessageProps) {
const { t } = useTranslation();
const [showDetails, setShowDetails] = React.useState(false);
const hasValidTranslationId = !!errorId && i18n.exists(errorId);
const errorKey = hasValidTranslationId
? errorId
: "CHAT_INTERFACE$AGENT_ERROR_MESSAGE";
return (
<div className="flex flex-col gap-2 border-l-2 pl-2 my-2 py-2 border-danger text-sm w-full">
<div className="font-bold text-danger">
{t(errorKey)}
<button
type="button"
onClick={() => setShowDetails((prev) => !prev)}
className="cursor-pointer text-left"
>
{showDetails ? (
<ArrowUp className="h-4 w-4 ml-2 inline fill-danger" />
) : (
<ArrowDown className="h-4 w-4 ml-2 inline fill-danger" />
)}
</button>
</div>
{showDetails && (
<Markdown
components={{
code,
ul,
ol,
}}
remarkPlugins={[remarkGfm]}
>
{defaultMessage}
</Markdown>
)}
</div>
);
}

View File

@@ -0,0 +1,125 @@
import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
import {
FileWriteAction,
CommandAction,
IPythonAction,
BrowseAction,
BrowseInteractiveAction,
MCPAction,
ThinkAction,
OpenHandsAction,
FinishAction,
} from "#/types/core/actions";
import { getDefaultEventContent, MAX_CONTENT_LENGTH } from "./shared";
const getRiskText = (risk: ActionSecurityRisk) => {
switch (risk) {
case ActionSecurityRisk.LOW:
return "Low Risk";
case ActionSecurityRisk.MEDIUM:
return "Medium Risk";
case ActionSecurityRisk.HIGH:
return "High Risk";
case ActionSecurityRisk.UNKNOWN:
default:
return "Unknown Risk";
}
};
const getWriteActionContent = (event: FileWriteAction): string => {
let { content } = event.args;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${event.args.content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
return `${event.args.path}\n${content}`;
};
const getRunActionContent = (event: CommandAction): string => {
let content = `Command:\n\`${event.args.command}\``;
if (event.args.confirmation_state === "awaiting_confirmation") {
content += `\n\n${getRiskText(event.args.security_risk)}`;
}
return content;
};
const getIPythonActionContent = (event: IPythonAction): string => {
let content = `\`\`\`\n${event.args.code}\n\`\`\``;
if (event.args.confirmation_state === "awaiting_confirmation") {
content += `\n\n${getRiskText(event.args.security_risk)}`;
}
return content;
};
const getBrowseActionContent = (event: BrowseAction): string =>
`Browsing ${event.args.url}`;
const getBrowseInteractiveActionContent = (event: BrowseInteractiveAction) =>
`**Action:**\n\n\`\`\`python\n${event.args.browser_actions}\n\`\`\``;
const getMcpActionContent = (event: MCPAction): string => {
// Format MCP action with name and arguments
const name = event.args.name || "";
const args = event.args.arguments || {};
let details = `**MCP Tool Call:** ${name}\n\n`;
// Include thought if available
if (event.args.thought) {
details += `\n\n**Thought:**\n${event.args.thought}`;
}
details += `\n\n**Arguments:**\n\`\`\`json\n${JSON.stringify(args, null, 2)}\n\`\`\``;
return details;
};
const getThinkActionContent = (event: ThinkAction): string =>
event.args.thought;
const getFinishActionContent = (event: FinishAction): string => {
let content = event.args.final_thought;
switch (event.args.task_completed) {
case "success":
content +=
"\n\n\nI believe that the task was **completed successfully**.";
break;
case "failure":
content += "\n\n\nI believe that the task was **not completed**.";
break;
case "partial":
default:
content += "\n\n\nI believe that the task was **completed partially**.";
break;
}
return content.trim();
};
const getNoContentActionContent = (): string => "";
export const getActionContent = (event: OpenHandsAction): string => {
switch (event.action) {
case "read":
case "edit":
return getNoContentActionContent();
case "write":
return getWriteActionContent(event);
case "run":
return getRunActionContent(event);
case "run_ipython":
return getIPythonActionContent(event);
case "browse":
return getBrowseActionContent(event);
case "browse_interactive":
return getBrowseInteractiveActionContent(event);
case "call_tool_mcp":
return getMcpActionContent(event);
case "think":
return getThinkActionContent(event);
case "finish":
return getFinishActionContent(event);
default:
return getDefaultEventContent(event);
}
};

View File

@@ -0,0 +1,70 @@
import { Trans } from "react-i18next";
import { OpenHandsAction } from "#/types/core/actions";
import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
import { OpenHandsObservation } from "#/types/core/observations";
import { MonoComponent } from "../mono-component";
import { PathComponent } from "../path-component";
import { getActionContent } from "./get-action-content";
import { getObservationContent } from "./get-observation-content";
const hasPathProperty = (
obj: Record<string, unknown>,
): obj is { path: string } => typeof obj.path === "string";
const hasCommandProperty = (
obj: Record<string, unknown>,
): obj is { command: string } => typeof obj.command === "string";
const trimText = (text: string, maxLength: number): string => {
if (!text) return "";
return text.length > maxLength ? `${text.substring(0, maxLength)}...` : text;
};
export const getEventContent = (
event: OpenHandsAction | OpenHandsObservation,
) => {
let title: React.ReactNode = "";
let details: string = "";
if (isOpenHandsAction(event)) {
title = (
<Trans
i18nKey={`ACTION_MESSAGE$${event.action.toUpperCase()}`}
values={{
path: hasPathProperty(event.args) && event.args.path,
command:
hasCommandProperty(event.args) && trimText(event.args.command, 80),
}}
components={{
path: <PathComponent />,
cmd: <MonoComponent />,
}}
/>
);
details = getActionContent(event);
}
if (isOpenHandsObservation(event)) {
title = (
<Trans
i18nKey={`OBSERVATION_MESSAGE$${event.observation.toUpperCase()}`}
values={{
path: hasPathProperty(event.extras) && event.extras.path,
command:
hasCommandProperty(event.extras) &&
trimText(event.extras.command, 80),
}}
components={{
path: <PathComponent />,
cmd: <MonoComponent />,
}}
/>
);
details = getObservationContent(event);
}
return {
title: title ?? "Unknown event",
details: details ?? "Unknown event",
};
};

View File

@@ -0,0 +1,133 @@
import {
ReadObservation,
CommandObservation,
IPythonObservation,
EditObservation,
BrowseObservation,
OpenHandsObservation,
RecallObservation,
} from "#/types/core/observations";
import { getObservationResult } from "./get-observation-result";
import { getDefaultEventContent, MAX_CONTENT_LENGTH } from "./shared";
const getReadObservationContent = (event: ReadObservation): string =>
`\`\`\`\n${event.content}\n\`\`\``;
const getCommandObservationContent = (
event: CommandObservation | IPythonObservation,
): string => {
let { content } = event;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
return `Output:\n\`\`\`sh\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
};
const getEditObservationContent = (
event: EditObservation,
successMessage: boolean,
): string => {
if (successMessage) {
return `\`\`\`diff\n${event.extras.diff}\n\`\`\``; // Content is already truncated by the ACI
}
return event.content;
};
const getBrowseObservationContent = (event: BrowseObservation) => {
let contentDetails = `**URL:** ${event.extras.url}\n`;
if (event.extras.error) {
contentDetails += `\n\n**Error:**\n${event.extras.error}\n`;
}
contentDetails += `\n\n**Output:**\n${event.content}`;
if (contentDetails.length > MAX_CONTENT_LENGTH) {
contentDetails = `${contentDetails.slice(0, MAX_CONTENT_LENGTH)}...(truncated)`;
}
return contentDetails;
};
const getMcpObservationContent = (event: OpenHandsObservation): string => {
let { content } = event;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
return `**Output:**\n\`\`\`\n${content.trim() || "[MCP Tool finished execution with no output]"}\n\`\`\``;
};
const getRecallObservationContent = (event: RecallObservation): string => {
let content = "";
if (event.extras.recall_type === "workspace_context") {
if (event.extras.repo_name) {
content += `\n\n**Repository:** ${event.extras.repo_name}`;
}
if (event.extras.repo_directory) {
content += `\n\n**Directory:** ${event.extras.repo_directory}`;
}
if (event.extras.date) {
content += `\n\n**Date:** ${event.extras.date}`;
}
if (
event.extras.runtime_hosts &&
Object.keys(event.extras.runtime_hosts).length > 0
) {
content += `\n\n**Available Hosts**`;
for (const [host, port] of Object.entries(event.extras.runtime_hosts)) {
content += `\n\n- ${host} (port ${port})`;
}
}
if (event.extras.repo_instructions) {
content += `\n\n**Repository Instructions:**\n\n${event.extras.repo_instructions}`;
}
if (event.extras.additional_agent_instructions) {
content += `\n\n**Additional Instructions:**\n\n${event.extras.additional_agent_instructions}`;
}
}
// Handle microagent knowledge
if (
event.extras.microagent_knowledge &&
event.extras.microagent_knowledge.length > 0
) {
content += `\n\n**Triggered Microagent Knowledge:**`;
for (const knowledge of event.extras.microagent_knowledge) {
content += `\n\n- **${knowledge.name}** (triggered by keyword: ${knowledge.trigger})\n\n\`\`\`\n${knowledge.content}\n\`\`\``;
}
}
if (
event.extras.custom_secrets_descriptions &&
Object.keys(event.extras.custom_secrets_descriptions).length > 0
) {
content += `\n\n**Custom Secrets**`;
for (const [name, description] of Object.entries(
event.extras.custom_secrets_descriptions,
)) {
content += `\n\n- $${name}: ${description}`;
}
}
return content;
};
export const getObservationContent = (event: OpenHandsObservation): string => {
switch (event.observation) {
case "read":
return getReadObservationContent(event);
case "edit":
return getEditObservationContent(
event,
getObservationResult(event) === "success",
);
case "run_ipython":
case "run":
return getCommandObservationContent(event);
case "browse":
return getBrowseObservationContent(event);
case "mcp":
return getMcpObservationContent(event);
case "recall":
return getRecallObservationContent(event);
default:
return getDefaultEventContent(event);
}
};

View File

@@ -0,0 +1,26 @@
import { OpenHandsObservation } from "#/types/core/observations";
export type ObservationResultStatus = "success" | "error" | "timeout";
export const getObservationResult = (event: OpenHandsObservation) => {
const hasContent = event.content.length > 0;
const contentIncludesError = event.content.toLowerCase().includes("error:");
switch (event.observation) {
case "run": {
const exitCode = event.extras.metadata.exit_code;
if (exitCode === -1) return "timeout"; // Command timed out
if (exitCode === 0) return "success"; // Command executed successfully
return "error"; // Command failed
}
case "run_ipython":
case "read":
case "edit":
case "mcp":
if (!hasContent || contentIncludesError) return "error";
return "success"; // Content is valid
default:
return "success";
}
};

View File

@@ -0,0 +1,8 @@
import { OpenHandsAction } from "#/types/core/actions";
import { OpenHandsObservation } from "#/types/core/observations";
export const MAX_CONTENT_LENGTH = 1000;
export const getDefaultEventContent = (
event: OpenHandsAction | OpenHandsObservation,
): string => `\`\`\`json\n${JSON.stringify(event, null, 2)}\n\`\`\``;

View File

@@ -0,0 +1,27 @@
import { OpenHandsAction } from "#/types/core/actions";
import { OpenHandsEventType } from "#/types/core/base";
import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
import { OpenHandsObservation } from "#/types/core/observations";
const COMMON_NO_RENDER_LIST: OpenHandsEventType[] = [
"system",
"agent_state_changed",
"change_agent_state",
];
const ACTION_NO_RENDER_LIST: OpenHandsEventType[] = ["recall"];
export const shouldRenderEvent = (
event: OpenHandsAction | OpenHandsObservation,
) => {
if (isOpenHandsAction(event)) {
const noRenderList = COMMON_NO_RENDER_LIST.concat(ACTION_NO_RENDER_LIST);
return !noRenderList.includes(event.action);
}
if (isOpenHandsObservation(event)) {
return !COMMON_NO_RENDER_LIST.includes(event.observation);
}
return true;
};

View File

@@ -0,0 +1,123 @@
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { I18nKey } from "#/i18n/declaration";
import { OpenHandsAction } from "#/types/core/actions";
import {
isUserMessage,
isErrorObservation,
isAssistantMessage,
isOpenHandsAction,
isOpenHandsObservation,
isFinishAction,
isRejectObservation,
} from "#/types/core/guards";
import { OpenHandsObservation } from "#/types/core/observations";
import { ImageCarousel } from "../images/image-carousel";
import { ChatMessage } from "./chat-message";
import { ErrorMessage } from "./error-message";
import { getObservationResult } from "./event-content-helpers/get-observation-result";
import { getEventContent } from "./event-content-helpers/get-event-content";
import { ExpandableMessage } from "./expandable-message";
import { GenericEventMessage } from "./generic-event-message";
const hasThoughtProperty = (
obj: Record<string, unknown>,
): obj is { thought: string } => "thought" in obj && !!obj.thought;
interface EventMessageProps {
event: OpenHandsAction | OpenHandsObservation;
hasObservationPair: boolean;
isFirstMessageWithResolverTrigger: boolean;
isAwaitingUserConfirmation: boolean;
isLastMessage: boolean;
}
export function EventMessage({
event,
hasObservationPair,
isFirstMessageWithResolverTrigger,
isAwaitingUserConfirmation,
isLastMessage,
}: EventMessageProps) {
const shouldShowConfirmationButtons =
isLastMessage && event.source === "agent" && isAwaitingUserConfirmation;
const isFirstUserMessageWithResolverTrigger =
isFirstMessageWithResolverTrigger && isUserMessage(event);
// Special case: First user message with resolver trigger
if (isFirstUserMessageWithResolverTrigger) {
return (
<div>
<ExpandableMessage
type="action"
message={event.args.content}
id={I18nKey.CHAT$RESOLVER_INSTRUCTIONS}
/>
{event.args.image_urls && event.args.image_urls.length > 0 && (
<ImageCarousel size="small" images={event.args.image_urls} />
)}
</div>
);
}
if (isErrorObservation(event)) {
return (
<ErrorMessage
errorId={event.extras.error_id}
defaultMessage={event.message}
/>
);
}
if (
hasObservationPair &&
isOpenHandsAction(event) &&
hasThoughtProperty(event.args)
) {
return <ChatMessage type="agent" message={event.args.thought} />;
}
if (isFinishAction(event)) {
return (
<ChatMessage type="agent" message={getEventContent(event).details} />
);
}
if (isUserMessage(event) || isAssistantMessage(event)) {
return (
<ChatMessage
type={event.source}
message={isUserMessage(event) ? event.args.content : event.message}
>
{event.args.image_urls && event.args.image_urls.length > 0 && (
<ImageCarousel size="small" images={event.args.image_urls} />
)}
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</ChatMessage>
);
}
if (isRejectObservation(event)) {
return <ChatMessage type="agent" message={event.content} />;
}
return (
<div>
{isOpenHandsAction(event) && hasThoughtProperty(event.args) && (
<ChatMessage type="agent" message={event.args.thought} />
)}
<GenericEventMessage
title={getEventContent(event).title}
details={getEventContent(event).details}
success={
isOpenHandsObservation(event)
? getObservationResult(event)
: undefined
}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}

View File

@@ -0,0 +1,61 @@
import React from "react";
import Markdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { code } from "../markdown/code";
import { ol, ul } from "../markdown/list";
import ArrowDown from "#/icons/angle-down-solid.svg?react";
import ArrowUp from "#/icons/angle-up-solid.svg?react";
import { SuccessIndicator } from "./success-indicator";
import { ObservationResultStatus } from "./event-content-helpers/get-observation-result";
interface GenericEventMessageProps {
title: React.ReactNode;
details: string;
success?: ObservationResultStatus;
}
export function GenericEventMessage({
title,
details,
success,
}: GenericEventMessageProps) {
const [showDetails, setShowDetails] = React.useState(false);
return (
<div className="flex flex-col gap-2 border-l-2 pl-2 my-2 py-2 border-neutral-300 text-sm w-full">
<div className="flex items-center justify-between font-bold text-neutral-300">
<div>
{title}
{details && (
<button
type="button"
onClick={() => setShowDetails((prev) => !prev)}
className="cursor-pointer text-left"
>
{showDetails ? (
<ArrowUp className="h-4 w-4 ml-2 inline fill-neutral-300" />
) : (
<ArrowDown className="h-4 w-4 ml-2 inline fill-neutral-300" />
)}
</button>
)}
</div>
{success && <SuccessIndicator status={success} />}
</div>
{showDetails && (
<Markdown
components={{
code,
ul,
ol,
}}
remarkPlugins={[remarkGfm]}
>
{details}
</Markdown>
)}
</div>
);
}

View File

@@ -1,80 +1,82 @@
import React from "react";
import type { Message } from "#/message";
import { ChatMessage } from "#/components/features/chat/chat-message";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { ImageCarousel } from "../images/image-carousel";
import { ExpandableMessage } from "./expandable-message";
import { useUserConversation } from "#/hooks/query/use-user-conversation";
import { useConversation } from "#/context/conversation-context";
import { I18nKey } from "#/i18n/declaration";
import { OpenHandsAction } from "#/types/core/actions";
import { OpenHandsObservation } from "#/types/core/observations";
import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
import { OpenHandsEventType } from "#/types/core/base";
import { EventMessage } from "./event-message";
import { ChatMessage } from "./chat-message";
import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
const COMMON_NO_RENDER_LIST: OpenHandsEventType[] = [
"system",
"agent_state_changed",
"change_agent_state",
];
const ACTION_NO_RENDER_LIST: OpenHandsEventType[] = ["recall"];
const shouldRenderEvent = (event: OpenHandsAction | OpenHandsObservation) => {
if (isOpenHandsAction(event)) {
const noRenderList = COMMON_NO_RENDER_LIST.concat(ACTION_NO_RENDER_LIST);
return !noRenderList.includes(event.action);
}
if (isOpenHandsObservation(event)) {
return !COMMON_NO_RENDER_LIST.includes(event.observation);
}
return true;
};
interface MessagesProps {
messages: Message[];
messages: (OpenHandsAction | OpenHandsObservation)[];
isAwaitingUserConfirmation: boolean;
}
export const Messages: React.FC<MessagesProps> = React.memo(
({ messages, isAwaitingUserConfirmation }) => {
const { getOptimisticUserMessage } = useOptimisticUserMessage();
const { conversationId } = useConversation();
const { data: conversation } = useUserConversation(conversationId || null);
const optimisticUserMessage = getOptimisticUserMessage();
// Check if conversation metadata has trigger=resolver
const isResolverTrigger = conversation?.trigger === "resolver";
return messages.map((message, index) => {
const shouldShowConfirmationButtons =
messages.length - 1 === index &&
message.sender === "assistant" &&
isAwaitingUserConfirmation;
const actionHasObservationPair = React.useCallback(
(event: OpenHandsAction | OpenHandsObservation): boolean => {
if (isOpenHandsAction(event)) {
return !!messages.some(
(msg) => isOpenHandsObservation(msg) && msg.cause === event.id,
);
}
const isFirstUserMessageWithResolverTrigger =
index === 0 && message.sender === "user" && isResolverTrigger;
return false;
},
[messages],
);
// Special case: First user message with resolver trigger
if (isFirstUserMessageWithResolverTrigger) {
return (
<div key={index}>
<ExpandableMessage
type="action"
message={message.content}
id={I18nKey.CHAT$RESOLVER_INSTRUCTIONS}
/>
{message.imageUrls && message.imageUrls.length > 0 && (
<ImageCarousel size="small" images={message.imageUrls} />
)}
</div>
);
}
return (
<>
{messages.filter(shouldRenderEvent).map((message, index) => (
<EventMessage
key={index}
event={message}
hasObservationPair={actionHasObservationPair(message)}
isFirstMessageWithResolverTrigger={index === 0 && isResolverTrigger}
isAwaitingUserConfirmation={isAwaitingUserConfirmation}
isLastMessage={messages.length - 1 === index}
/>
))}
if (message.type === "error" || message.type === "action") {
return (
<div key={index}>
<ExpandableMessage
type={message.type}
id={message.translationID}
message={message.content}
success={message.success}
observation={message.observation}
action={message.action}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
return (
<ChatMessage
key={index}
type={message.sender}
message={message.content}
>
{message.imageUrls && message.imageUrls.length > 0 && (
<ImageCarousel size="small" images={message.imageUrls} />
)}
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</ChatMessage>
);
});
{optimisticUserMessage && (
<ChatMessage type="user" message={optimisticUserMessage} />
)}
</>
);
},
);

View File

@@ -0,0 +1,35 @@
import { FaClock } from "react-icons/fa";
import CheckCircle from "#/icons/check-circle-solid.svg?react";
import XCircle from "#/icons/x-circle-solid.svg?react";
import { ObservationResultStatus } from "./event-content-helpers/get-observation-result";
interface SuccessIndicatorProps {
status: ObservationResultStatus;
}
export function SuccessIndicator({ status }: SuccessIndicatorProps) {
return (
<span className="flex-shrink-0">
{status === "success" && (
<CheckCircle
data-testid="status-icon"
className="h-4 w-4 ml-2 inline fill-success"
/>
)}
{status === "error" && (
<XCircle
data-testid="status-icon"
className="h-4 w-4 ml-2 inline fill-danger"
/>
)}
{status === "timeout" && (
<FaClock
data-testid="status-icon"
className="h-4 w-4 ml-2 inline fill-yellow-500"
/>
)}
</span>
);
}

View File

@@ -7,13 +7,11 @@ import { I18nKey } from "#/i18n/declaration";
interface AccountSettingsContextMenuProps {
onLogout: () => void;
onClose: () => void;
isLoggedIn: boolean;
}
export function AccountSettingsContextMenu({
onLogout,
onClose,
isLoggedIn,
}: AccountSettingsContextMenuProps) {
const ref = useClickOutsideElement<HTMLUListElement>(onClose);
const { t } = useTranslation();
@@ -24,7 +22,7 @@ export function AccountSettingsContextMenu({
ref={ref}
className="absolute right-full md:left-full -top-1 z-10 w-fit"
>
<ContextMenuListItem onClick={onLogout} isDisabled={!isLoggedIn}>
<ContextMenuListItem onClick={onLogout}>
{t(I18nKey.ACCOUNT_SETTINGS$LOGOUT)}
</ContextMenuListItem>
</ContextMenu>

View File

@@ -15,8 +15,9 @@ import { cn } from "#/utils/utils";
import { BaseModal } from "../../shared/modals/base-modal/base-modal";
import { RootState } from "#/store";
import { I18nKey } from "#/i18n/declaration";
import { selectSystemMessage } from "#/state/chat-slice";
import { transformVSCodeUrl } from "#/utils/vscode-url-helper";
import { useWsClient } from "#/context/ws-client-provider";
import { isSystemMessage } from "#/types/core/guards";
interface ConversationCardProps {
onClick?: () => void;
@@ -52,15 +53,17 @@ export function ConversationCard({
conversationId,
}: ConversationCardProps) {
const { t } = useTranslation();
const { parsedEvents } = useWsClient();
const [contextMenuVisible, setContextMenuVisible] = React.useState(false);
const [titleMode, setTitleMode] = React.useState<"view" | "edit">("view");
const [metricsModalVisible, setMetricsModalVisible] = React.useState(false);
const [systemModalVisible, setSystemModalVisible] = React.useState(false);
const inputRef = React.useRef<HTMLInputElement>(null);
const systemMessage = parsedEvents.find(isSystemMessage);
// Subscribe to metrics data from Redux store
const metrics = useSelector((state: RootState) => state.metrics);
const systemMessage = useSelector(selectSystemMessage);
const handleBlur = () => {
if (inputRef.current?.value) {
@@ -365,7 +368,7 @@ export function ConversationCard({
<SystemMessageModal
isOpen={systemModalVisible}
onClose={() => setSystemModalVisible(false)}
systemMessage={systemMessage}
systemMessage={systemMessage ? systemMessage.args : null}
/>
</>
);

View File

@@ -6,6 +6,7 @@ import { cn } from "#/utils/utils";
import { useUserRepositories } from "#/hooks/query/use-user-repositories";
import { TaskIssueNumber } from "./task-issue-number";
import { Provider } from "#/types/settings";
import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
const getTaskTypeMap = (
t: (key: string) => string,
@@ -21,6 +22,7 @@ interface TaskCardProps {
}
export function TaskCard({ task }: TaskCardProps) {
const { setOptimisticUserMessage } = useOptimisticUserMessage();
const { data: repositories } = useUserRepositories();
const { mutate: createConversation, isPending } = useCreateConversation();
const isCreatingConversation = useIsCreatingConversation();
@@ -38,6 +40,7 @@ export function TaskCard({ task }: TaskCardProps) {
const handleLaunchConversation = () => {
const repo = getRepo(task.repo, task.git_provider);
setOptimisticUserMessage("Addressing task...");
return createConversation({
selectedRepository: repo,

View File

@@ -24,6 +24,10 @@ export function JupyterCellOutput({ lines }: JupyterCellOutputProps) {
{/* display the lines as plaintext or image */}
{lines.map((line, index) => {
if (line.type === "image") {
// Use markdown to display the image
const imageMarkdown = line.url
? `![image](${line.url})`
: line.content;
return (
<div key={index}>
<Markdown
@@ -32,7 +36,7 @@ export function JupyterCellOutput({ lines }: JupyterCellOutputProps) {
}}
urlTransform={(value: string) => value}
>
{line.content}
{imageMarkdown}
</Markdown>
</div>
);

View File

@@ -12,8 +12,8 @@ export function JupyterCell({ cell }: JupyterCellProps) {
const [lines, setLines] = React.useState<JupyterLine[]>([]);
React.useEffect(() => {
setLines(parseCellContent(cell.content));
}, [cell.content]);
setLines(parseCellContent(cell.content, cell.imageUrls));
}, [cell.content, cell.imageUrls]);
if (cell.type === "input") {
return <JupytrerCellInput code={cell.content} />;

View File

@@ -35,7 +35,6 @@ export function UserActions({ onLogout, user, isLoading }: UserActionsProps) {
{accountContextMenuIsVisible && (
<AccountSettingsContextMenu
isLoggedIn={!!user}
onLogout={handleLogout}
onClose={closeAccountMenu}
/>

View File

@@ -3,7 +3,7 @@ import { io, Socket } from "socket.io-client";
import { useQueryClient } from "@tanstack/react-query";
import EventLogger from "#/utils/event-logger";
import { handleAssistantMessage } from "#/services/actions";
import { showChatError } from "#/utils/error-handler";
import { showChatError, trackError } from "#/utils/error-handler";
import { useRate } from "#/hooks/use-rate";
import { OpenHandsParsedEvent } from "#/types/core";
import {
@@ -11,10 +11,26 @@ import {
CommandAction,
FileEditAction,
FileWriteAction,
OpenHandsAction,
UserMessageAction,
} from "#/types/core/actions";
import { Conversation } from "#/api/open-hands.types";
import { useUserProviders } from "#/hooks/use-user-providers";
import { OpenHandsObservation } from "#/types/core/observations";
import {
isErrorObservation,
isOpenHandsAction,
isOpenHandsObservation,
isUserMessage,
} from "#/types/core/guards";
import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
import { useWSErrorMessage } from "#/hooks/use-ws-error-message";
const hasValidMessageProperty = (obj: unknown): obj is { message: string } =>
typeof obj === "object" &&
obj !== null &&
"message" in obj &&
typeof obj.message === "string";
const isOpenHandsEvent = (event: unknown): event is OpenHandsParsedEvent =>
typeof event === "object" &&
@@ -35,14 +51,6 @@ const isFileEditAction = (
const isCommandAction = (event: OpenHandsParsedEvent): event is CommandAction =>
"action" in event && event.action === "run";
const isUserMessage = (
event: OpenHandsParsedEvent,
): event is UserMessageAction =>
"source" in event &&
"type" in event &&
event.source === "user" &&
event.type === "message";
const isAssistantMessage = (
event: OpenHandsParsedEvent,
): event is AssistantMessageAction =>
@@ -65,6 +73,7 @@ interface UseWsClient {
status: WsClientProviderStatus;
isLoadingMessages: boolean;
events: Record<string, unknown>[];
parsedEvents: (OpenHandsAction | OpenHandsObservation)[];
send: (event: Record<string, unknown>) => void;
}
@@ -72,6 +81,7 @@ const WsClientContext = React.createContext<UseWsClient>({
status: WsClientProviderStatus.DISCONNECTED,
isLoadingMessages: true,
events: [],
parsedEvents: [],
send: () => {
throw new Error("not connected");
},
@@ -121,12 +131,17 @@ export function WsClientProvider({
conversationId,
children,
}: React.PropsWithChildren<WsClientProviderProps>) {
const { removeOptimisticUserMessage } = useOptimisticUserMessage();
const { setErrorMessage, removeErrorMessage } = useWSErrorMessage();
const queryClient = useQueryClient();
const sioRef = React.useRef<Socket | null>(null);
const [status, setStatus] = React.useState(
WsClientProviderStatus.DISCONNECTED,
);
const [events, setEvents] = React.useState<Record<string, unknown>[]>([]);
const [parsedEvents, setParsedEvents] = React.useState<
(OpenHandsAction | OpenHandsObservation)[]
>([]);
const lastEventRef = React.useRef<Record<string, unknown> | null>(null);
const { providers } = useUserProviders();
@@ -146,6 +161,24 @@ export function WsClientProvider({
function handleMessage(event: Record<string, unknown>) {
if (isOpenHandsEvent(event)) {
if (isOpenHandsAction(event) || isOpenHandsObservation(event)) {
setParsedEvents((prevEvents) => [...prevEvents, event]);
}
if (isErrorObservation(event)) {
trackError({
message: event.message,
source: "chat",
metadata: { msgId: event.id },
});
} else {
removeErrorMessage();
}
if (isUserMessage(event)) {
removeOptimisticUserMessage();
}
if (isMessageAction(event)) {
messageRateHandler.record(new Date().getTime());
}
@@ -156,7 +189,7 @@ export function WsClientProvider({
isFileWriteAction(event) ||
isCommandAction(event)
) {
queryClient.invalidateQueries({
queryClient.removeQueries({
queryKey: ["file_changes", conversationId],
});
@@ -202,11 +235,23 @@ export function WsClientProvider({
sio.io.opts.query = sio.io.opts.query || {};
sio.io.opts.query.latest_event_id = lastEventRef.current?.id;
updateStatusWhenErrorMessagePresent(data);
setErrorMessage(
hasValidMessageProperty(data)
? data.message
: "The WebSocket connection was closed.",
);
}
function handleError(data: unknown) {
setStatus(WsClientProviderStatus.DISCONNECTED);
updateStatusWhenErrorMessagePresent(data);
setErrorMessage(
hasValidMessageProperty(data)
? data.message
: "An unknown error occurred on the WebSocket connection.",
);
}
React.useEffect(() => {
@@ -267,9 +312,10 @@ export function WsClientProvider({
status,
isLoadingMessages: messageRateHandler.isUnderThreshold,
events,
parsedEvents,
send,
}),
[status, messageRateHandler.isUnderThreshold, events],
[status, messageRateHandler.isUnderThreshold, events, parsedEvents],
);
return <WsClientContext value={value}>{children}</WsClientContext>;

View File

@@ -1,4 +1,5 @@
import { useQuery } from "@tanstack/react-query";
import axios, { AxiosError } from "axios";
import OpenHands from "#/api/open-hands";
import { useConfig } from "./use-config";
import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
@@ -11,7 +12,23 @@ export const useIsAuthed = () => {
return useQuery({
queryKey: ["user", "authenticated", appMode],
queryFn: () => OpenHands.authenticate(appMode!),
queryFn: async () => {
try {
// If in OSS mode or authentication succeeds, return true
await OpenHands.authenticate(appMode!);
return true;
} catch (error) {
// If it's a 401 error, return false (not authenticated)
if (axios.isAxiosError(error)) {
const axiosError = error as AxiosError;
if (axiosError.response?.status === 401) {
return false;
}
}
// For any other error, throw it to put the query in error state
throw error;
}
},
enabled: !!appMode && !isOnTosPage,
staleTime: 1000 * 60 * 5, // 5 minutes
gcTime: 1000 * 60 * 15, // 15 minutes

View File

@@ -1,10 +1,7 @@
import React from "react";
import { useDispatch } from "react-redux";
import { useWsClient } from "#/context/ws-client-provider";
import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
import { addErrorMessage } from "#/state/chat-slice";
import { AgentState } from "#/types/agent-state";
import { ErrorObservation } from "#/types/core/observations";
import { displayErrorToast } from "#/utils/custom-toast-handlers";
interface ServerError {
@@ -15,12 +12,8 @@ interface ServerError {
const isServerError = (data: object): data is ServerError => "error" in data;
const isErrorObservation = (data: object): data is ErrorObservation =>
"observation" in data && data.observation === "error";
export const useHandleWSEvents = () => {
const { events, send } = useWsClient();
const dispatch = useDispatch();
React.useEffect(() => {
if (!events.length) {
@@ -49,14 +42,5 @@ export const useHandleWSEvents = () => {
send(generateAgentStateChangeEvent(AgentState.PAUSED));
}
}
if (isErrorObservation(event)) {
dispatch(
addErrorMessage({
id: event.extras?.error_id,
message: event.message,
}),
);
}
}, [events.length]);
};

View File

@@ -0,0 +1,23 @@
import { useQueryClient } from "@tanstack/react-query";
export const useOptimisticUserMessage = () => {
const queryKey = ["optimistic_user_message"] as const;
const queryClient = useQueryClient();
const setOptimisticUserMessage = (message: string) => {
queryClient.setQueryData<string>(queryKey, message);
};
const getOptimisticUserMessage = () =>
queryClient.getQueryData<string>(queryKey);
const removeOptimisticUserMessage = () => {
queryClient.removeQueries({ queryKey });
};
return {
setOptimisticUserMessage,
getOptimisticUserMessage,
removeOptimisticUserMessage,
};
};

View File

@@ -0,0 +1,22 @@
import { useQueryClient } from "@tanstack/react-query";
export const useWSErrorMessage = () => {
const queryClient = useQueryClient();
const setErrorMessage = (message: string) => {
queryClient.setQueryData<string>(["error_message"], message);
};
const getErrorMessage = () =>
queryClient.getQueryData<string>(["error_message"]);
const removeErrorMessage = () => {
queryClient.removeQueries({ queryKey: ["error_message"] });
};
return {
setErrorMessage,
getErrorMessage,
removeErrorMessage,
};
};

View File

@@ -12,7 +12,8 @@
"ar": "قيمة السر مطلوبة",
"fr": "La valeur du secret est requise",
"tr": "Gizli değer gereklidir",
"de": "Geheimer Wert ist erforderlich"
"de": "Geheimer Wert ist erforderlich",
"uk": "Значення секрету є обов'язковим"
},
"SECRETS$ADD_SECRET": {
"en": "Add secret",
@@ -27,7 +28,8 @@
"ar": "إضافة سر",
"fr": "Ajouter un secret",
"tr": "Gizli ekle",
"de": "Geheimnis hinzufügen"
"de": "Geheimnis hinzufügen",
"uk": "Додати секрет"
},
"SECRETS$EDIT_SECRET": {
"en": "Edit secret",
@@ -42,7 +44,8 @@
"ar": "تعديل السر",
"fr": "Modifier le secret",
"tr": "Gizliyi düzenle",
"de": "Geheimnis bearbeiten"
"de": "Geheimnis bearbeiten",
"uk": "Редагувати секрет"
},
"SECRETS$NO_SECRETS_FOUND": {
"en": "No secrets found",
@@ -57,7 +60,8 @@
"ar": "لم يتم العثور على أسرار",
"fr": "Aucun secret trouvé",
"tr": "Gizli bulunamadı",
"de": "Keine Geheimnisse gefunden"
"de": "Keine Geheimnisse gefunden",
"uk": "Секретів не знайдено"
},
"SECRETS$ADD_NEW_SECRET": {
"en": "Add a new secret",
@@ -72,7 +76,8 @@
"ar": "إضافة سر جديد",
"fr": "Ajouter un nouveau secret",
"tr": "Yeni bir gizli ekle",
"de": "Neues Geheimnis hinzufügen"
"de": "Neues Geheimnis hinzufügen",
"uk": "Додати новий секрет"
},
"SECRETS$CONFIRM_DELETE_KEY": {
"en": "Are you sure you want to delete this key?",
@@ -87,7 +92,8 @@
"ar": "هل أنت متأكد أنك تريد حذف هذا المفتاح؟",
"fr": "Êtes-vous sûr de vouloir supprimer cette clé ?",
"tr": "Bu anahtarı silmek istediğinizden emin misiniz?",
"de": "Sind Sie sicher, dass Sie diesen Schlüssel löschen möchten?"
"de": "Sind Sie sicher, dass Sie diesen Schlüssel löschen möchten?",
"uk": "Ви впевнені, що хочете видалити цей ключ?"
},
"SETTINGS$MCP_TITLE": {
"en": "Model Context Protocol (MCP)",
@@ -1062,7 +1068,8 @@
"ar": "أسرار",
"fr": "Secrets",
"tr": "Sırları",
"de": "Geheimnisse"
"de": "Geheimnisse",
"uk": "Секрети"
},
"SETTINGS$NAV_API_KEYS": {
"en": "API Keys",
@@ -6377,20 +6384,20 @@
"uk": "Завантажити файл"
},
"ACTION_MESSAGE$RUN": {
"en": "Running <cmd>{{action.payload.args.command}}</cmd>",
"zh-CN": "运行 <cmd>{{action.payload.args.command}}</cmd>",
"zh-TW": "執行 <cmd>{{action.payload.args.command}}</cmd>",
"ko-KR": "실행 <cmd>{{action.payload.args.command}}</cmd>",
"ja": "実行 <cmd>{{action.payload.args.command}}</cmd>",
"no": "Kjører <cmd>{{action.payload.args.command}}</cmd>",
"ar": "تشغيل <cmd>{{action.payload.args.command}}</cmd>",
"de": "Führt <cmd>{{action.payload.args.command}}</cmd> aus",
"fr": "Exécution de <cmd>{{action.payload.args.command}}</cmd>",
"it": "Esecuzione di <cmd>{{action.payload.args.command}}</cmd>",
"pt": "Executando <cmd>{{action.payload.args.command}}</cmd>",
"es": "Ejecutando <cmd>{{action.payload.args.command}}</cmd>",
"tr": "<cmd>{{action.payload.args.command}}</cmd> çalıştırılıyor",
"uk": "Виконую <cmd>{{action.payload.args.command}}</cmd>"
"en": "Running <cmd>{{command}}</cmd>",
"zh-CN": "运行 <cmd>{{command}}</cmd>",
"zh-TW": "執行 <cmd>{{command}}</cmd>",
"ko-KR": "실행 <cmd>{{command}}</cmd>",
"ja": "実行 <cmd>{{command}}</cmd>",
"no": "Kjører <cmd>{{command}}</cmd>",
"ar": "تشغيل <cmd>{{command}}</cmd>",
"de": "Führt <cmd>{{command}}</cmd> aus",
"fr": "Exécution de <cmd>{{command}}</cmd>",
"it": "Esecuzione di <cmd>{{command}}</cmd>",
"pt": "Executando <cmd>{{command}}</cmd>",
"es": "Ejecutando <cmd>{{command}}</cmd>",
"tr": "<cmd>{{command}}</cmd> çalıştırılıyor",
"uk": "Виконую <cmd>{{command}}</cmd>"
},
"ACTION_MESSAGE$RUN_IPYTHON": {
"en": "Running a Python command",
@@ -6425,52 +6432,52 @@
"uk": "Викликаю інструмент MCP: {{action.payload.args.name}}"
},
"ACTION_MESSAGE$READ": {
"en": "Reading <path>{{action.payload.args.path}}</path>",
"zh-CN": "读取 <path>{{action.payload.args.path}}</path>",
"zh-TW": "讀取 <path>{{action.payload.args.path}}</path>",
"ko-KR": "읽기 <path>{{action.payload.args.path}}</path>",
"ja": "読み取り <path>{{action.payload.args.path}}</path>",
"no": "Leser <path>{{action.payload.args.path}}</path>",
"ar": "قراءة <path>{{action.payload.args.path}}</path>",
"de": "Liest <path>{{action.payload.args.path}}</path>",
"fr": "Lecture de <path>{{action.payload.args.path}}</path>",
"it": "Lettura di <path>{{action.payload.args.path}}</path>",
"pt": "Lendo <path>{{action.payload.args.path}}</path>",
"es": "Leyendo <path>{{action.payload.args.path}}</path>",
"tr": "<path>{{action.payload.args.path}}</path> okunuyor",
"uk": "Читаю <path>{{action.payload.args.path}}</path>"
"en": "Reading <path>{{path}}</path>",
"zh-CN": "读取 <path>{{path}}</path>",
"zh-TW": "讀取 <path>{{path}}</path>",
"ko-KR": "읽기 <path>{{path}}</path>",
"ja": "読み取り <path>{{path}}</path>",
"no": "Leser <path>{{path}}</path>",
"ar": "قراءة <path>{{path}}</path>",
"de": "Liest <path>{{path}}</path>",
"fr": "Lecture de <path>{{path}}</path>",
"it": "Lettura di <path>{{path}}</path>",
"pt": "Lendo <path>{{path}}</path>",
"es": "Leyendo <path>{{path}}</path>",
"tr": "<path>{{path}}</path> okunuyor",
"uk": "Читаю <path>{{path}}</path>"
},
"ACTION_MESSAGE$EDIT": {
"en": "Editing <path>{{action.payload.args.path}}</path>",
"zh-CN": "编辑 <path>{{action.payload.args.path}}</path>",
"zh-TW": "編輯 <path>{{action.payload.args.path}}</path>",
"ko-KR": "편집 <path>{{action.payload.args.path}}</path>",
"ja": "編集 <path>{{action.payload.args.path}}</path>",
"no": "Redigerer <path>{{action.payload.args.path}}</path>",
"ar": "تحرير <path>{{action.payload.args.path}}</path>",
"de": "Bearbeitet <path>{{action.payload.args.path}}</path>",
"fr": "Modification de <path>{{action.payload.args.path}}</path>",
"it": "Modifica di <path>{{action.payload.args.path}}</path>",
"pt": "Editando <path>{{action.payload.args.path}}</path>",
"es": "Editando <path>{{action.payload.args.path}}</path>",
"tr": "<path>{{action.payload.args.path}}</path> düzenleniyor",
"uk": "Редагую <path>{{action.payload.args.path}}</path>"
"en": "Editing <path>{{path}}</path>",
"zh-CN": "编辑 <path>{{path}}</path>",
"zh-TW": "編輯 <path>{{path}}</path>",
"ko-KR": "편집 <path>{{path}}</path>",
"ja": "編集 <path>{{path}}</path>",
"no": "Redigerer <path>{{path}}</path>",
"ar": "تحرير <path>{{path}}</path>",
"de": "Bearbeitet <path>{{path}}</path>",
"fr": "Modification de <path>{{path}}</path>",
"it": "Modifica di <path>{{path}}</path>",
"pt": "Editando <path>{{path}}</path>",
"es": "Editando <path>{{path}}</path>",
"tr": "<path>{{path}}</path> düzenleniyor",
"uk": "Редагую <path>{{path}}</path>"
},
"ACTION_MESSAGE$WRITE": {
"en": "Writing to <path>{{action.payload.args.path}}</path>",
"zh-CN": "写入 <path>{{action.payload.args.path}}</path>",
"zh-TW": "寫入 <path>{{action.payload.args.path}}</path>",
"ko-KR": "쓰기 <path>{{action.payload.args.path}}</path>",
"ja": "書き込み <path>{{action.payload.args.path}}</path>",
"no": "Skriver til <path>{{action.payload.args.path}}</path>",
"ar": "الكتابة إلى <path>{{action.payload.args.path}}</path>",
"de": "Schreibt in <path>{{action.payload.args.path}}</path>",
"fr": "Écriture dans <path>{{action.payload.args.path}}</path>",
"it": "Scrittura su <path>{{action.payload.args.path}}</path>",
"pt": "Escrevendo em <path>{{action.payload.args.path}}</path>",
"es": "Escribiendo en <path>{{action.payload.args.path}}</path>",
"tr": "<path>{{action.payload.args.path}}</path> dosyasına yazılıyor",
"uk": "Записую в <path>{{action.payload.args.path}}</path>"
"en": "Writing to <path>{{path}}</path>",
"zh-CN": "写入 <path>{{path}}</path>",
"zh-TW": "寫入 <path>{{path}}</path>",
"ko-KR": "쓰기 <path>{{path}}</path>",
"ja": "書き込み <path>{{path}}</path>",
"no": "Skriver til <path>{{path}}</path>",
"ar": "الكتابة إلى <path>{{path}}</path>",
"de": "Schreibt in <path>{{path}}</path>",
"fr": "Écriture dans <path>{{path}}</path>",
"it": "Scrittura su <path>{{path}}</path>",
"pt": "Escrevendo em <path>{{path}}</path>",
"es": "Escribiendo en <path>{{path}}</path>",
"tr": "<path>{{path}}</path> dosyasına yazılıyor",
"uk": "Записую в <path>{{path}}</path>"
},
"ACTION_MESSAGE$BROWSE": {
"en": "Browsing the web",
@@ -6537,20 +6544,20 @@
"uk": "Системне повідомлення"
},
"OBSERVATION_MESSAGE$RUN": {
"en": "Ran <cmd>{{observation.payload.extras.command}}</cmd>",
"zh-CN": "运行 <cmd>{{observation.payload.extras.command}}</cmd>",
"zh-TW": "執行 <cmd>{{observation.payload.extras.command}}</cmd>",
"ko-KR": "실행 <cmd>{{observation.payload.extras.command}}</cmd>",
"ja": "実行 <cmd>{{observation.payload.extras.command}}</cmd>",
"no": "Kjørte <cmd>{{observation.payload.extras.command}}</cmd>",
"ar": "تم تشغيل <cmd>{{observation.payload.extras.command}}</cmd>",
"de": "Führte <cmd>{{observation.payload.extras.command}}</cmd> aus",
"fr": "A exécuté <cmd>{{observation.payload.extras.command}}</cmd>",
"it": "Ha eseguito <cmd>{{observation.payload.extras.command}}</cmd>",
"pt": "Executou <cmd>{{observation.payload.extras.command}}</cmd>",
"es": "Ejecutó <cmd>{{observation.payload.extras.command}}</cmd>",
"tr": "<cmd>{{observation.payload.extras.command}}</cmd> çalıştırıldı",
"uk": "Запустив <cmd>{{observation.payload.extras.command}}</cmd>"
"en": "Ran <cmd>{{command}}</cmd>",
"zh-CN": "运行 <cmd>{{command}}</cmd>",
"zh-TW": "執行 <cmd>{{command}}</cmd>",
"ko-KR": "실행 <cmd>{{command}}</cmd>",
"ja": "実行 <cmd>{{command}}</cmd>",
"no": "Kjørte <cmd>{{command}}</cmd>",
"ar": "تم تشغيل <cmd>{{command}}</cmd>",
"de": "Führte <cmd>{{command}}</cmd> aus",
"fr": "A exécuté <cmd>{{command}}</cmd>",
"it": "Ha eseguito <cmd>{{command}}</cmd>",
"pt": "Executou <cmd>{{command}}</cmd>",
"es": "Ejecutó <cmd>{{command}}</cmd>",
"tr": "<cmd>{{command}}</cmd> çalıştırıldı",
"uk": "Запустив <cmd>{{command}}</cmd>"
},
"OBSERVATION_MESSAGE$RUN_IPYTHON": {
"en": "Ran a Python command",
@@ -6569,52 +6576,52 @@
"uk": "Виконав команду Python"
},
"OBSERVATION_MESSAGE$READ": {
"en": "Read <path>{{observation.payload.extras.path}}</path>",
"zh-CN": "读取 <path>{{observation.payload.extras.path}}</path>",
"zh-TW": "讀取 <path>{{observation.payload.extras.path}}</path>",
"ko-KR": "읽기 <path>{{observation.payload.extras.path}}</path>",
"ja": "読み取り <path>{{observation.payload.extras.path}}</path>",
"no": "Leste <path>{{observation.payload.extras.path}}</path>",
"ar": "تمت قراءة <path>{{observation.payload.extras.path}}</path>",
"de": "Las <path>{{observation.payload.extras.path}}</path>",
"fr": "A lu <path>{{observation.payload.extras.path}}</path>",
"it": "Ha letto <path>{{observation.payload.extras.path}}</path>",
"pt": "Leu <path>{{observation.payload.extras.path}}</path>",
"es": "Leyó <path>{{observation.payload.extras.path}}</path>",
"tr": "<path>{{observation.payload.extras.path}}</path> okundu",
"uk": "Прочитав <path>{{observation.payload.extras.path}}</path>"
"en": "Read <path>{{path}}</path>",
"zh-CN": "读取 <path>{{path}}</path>",
"zh-TW": "讀取 <path>{{path}}</path>",
"ko-KR": "읽기 <path>{{path}}</path>",
"ja": "読み取り <path>{{path}}</path>",
"no": "Leste <path>{{path}}</path>",
"ar": "تمت قراءة <path>{{path}}</path>",
"de": "Las <path>{{path}}</path>",
"fr": "A lu <path>{{path}}</path>",
"it": "Ha letto <path>{{path}}</path>",
"pt": "Leu <path>{{path}}</path>",
"es": "Leyó <path>{{path}}</path>",
"tr": "<path>{{path}}</path> okundu",
"uk": "Прочитав <path>{{path}}</path>"
},
"OBSERVATION_MESSAGE$EDIT": {
"en": "Edited <path>{{observation.payload.extras.path}}</path>",
"zh-CN": "编辑 <path>{{observation.payload.extras.path}}</path>",
"zh-TW": "編輯 <path>{{observation.payload.extras.path}}</path>",
"ko-KR": "편집 <path>{{observation.payload.extras.path}}</path>",
"ja": "編集 <path>{{observation.payload.extras.path}}</path>",
"no": "Redigerte <path>{{observation.payload.extras.path}}</path>",
"ar": "تم تحرير <path>{{observation.payload.extras.path}}</path>",
"de": "Hat <path>{{observation.payload.extras.path}}</path> bearbeitet",
"fr": "A modifié <path>{{observation.payload.extras.path}}</path>",
"it": "Ha modificato <path>{{observation.payload.extras.path}}</path>",
"pt": "Editou <path>{{observation.payload.extras.path}}</path>",
"es": "Editó <path>{{observation.payload.extras.path}}</path>",
"tr": "<path>{{observation.payload.extras.path}}</path> düzenlendi",
"uk": "Відредагував <path>{{observation.payload.extras.path}}</path>"
"en": "Edited <path>{{path}}</path>",
"zh-CN": "编辑 <path>{{path}}</path>",
"zh-TW": "編輯 <path>{{path}}</path>",
"ko-KR": "편집 <path>{{path}}</path>",
"ja": "編集 <path>{{path}}</path>",
"no": "Redigerte <path>{{path}}</path>",
"ar": "تم تحرير <path>{{path}}</path>",
"de": "Hat <path>{{path}}</path> bearbeitet",
"fr": "A modifié <path>{{path}}</path>",
"it": "Ha modificato <path>{{path}}</path>",
"pt": "Editou <path>{{path}}</path>",
"es": "Editó <path>{{path}}</path>",
"tr": "<path>{{path}}</path> düzenlendi",
"uk": "Відредагував <path>{{path}}</path>"
},
"OBSERVATION_MESSAGE$WRITE": {
"en": "Wrote to <path>{{observation.payload.extras.path}}</path>",
"zh-CN": "写入 <path>{{observation.payload.extras.path}}</path>",
"zh-TW": "寫入 <path>{{observation.payload.extras.path}}</path>",
"ko-KR": "쓰기 <path>{{observation.payload.extras.path}}</path>",
"ja": "書き込み <path>{{observation.payload.extras.path}}</path>",
"no": "Skrev til <path>{{observation.payload.extras.path}}</path>",
"ar": "تمت الكتابة إلى <path>{{observation.payload.extras.path}}</path>",
"de": "Hat in <path>{{observation.payload.extras.path}}</path> geschrieben",
"fr": "A écrit dans <path>{{observation.payload.extras.path}}</path>",
"it": "Ha scritto su <path>{{observation.payload.extras.path}}</path>",
"pt": "Escreveu em <path>{{observation.payload.extras.path}}</path>",
"es": "Escribió en <path>{{observation.payload.extras.path}}</path>",
"tr": "<path>{{observation.payload.extras.path}}</path> dosyasına yazıldı",
"uk": "Записав на <path>{{observation.payload.extras.path}}</path>"
"en": "Wrote to <path>{{path}}</path>",
"zh-CN": "写入 <path>{{path}}</path>",
"zh-TW": "寫入 <path>{{path}}</path>",
"ko-KR": "쓰기 <path>{{path}}</path>",
"ja": "書き込み <path>{{path}}</path>",
"no": "Skrev til <path>{{path}}</path>",
"ar": "تمت الكتابة إلى <path>{{path}}</path>",
"de": "Hat in <path>{{path}}</path> geschrieben",
"fr": "A écrit dans <path>{{path}}</path>",
"it": "Ha scritto su <path>{{path}}</path>",
"pt": "Escreveu em <path>{{path}}</path>",
"es": "Escribió en <path>{{path}}</path>",
"tr": "<path>{{path}}</path> dosyasına yazıldı",
"uk": "Записав на <path>{{path}}</path>"
},
"OBSERVATION_MESSAGE$BROWSE": {
"en": "Browsing completed",
@@ -6649,20 +6656,20 @@
"uk": "Результат інструменту MCP: {{action.payload.args.name}}"
},
"OBSERVATION_MESSAGE$RECALL": {
"en": "Microagent Activated",
"ja": "マイクロエージェントが有効化されました",
"zh-CN": "微代理已激活",
"zh-TW": "微代理已啟動",
"ko-KR": "마이크로에이전트 활성화됨",
"no": "MikroAgent aktivert",
"it": "Microagent attivato",
"pt": "Microagent ativado",
"es": "Microagent activado",
"ar": "تم تنشيط الوكيل المصغر",
"fr": "Microagent activé",
"tr": "MikroAjan Etkinleştirildi",
"de": "Microagent aktiviert",
"uk": "Мікроагент активований"
"en": "Microagent ready",
"ja": "マイクロエージェントの準備完了",
"zh-CN": "微代理已就绪",
"zh-TW": "微代理已就緒",
"ko-KR": "마이크로에이전트 준비됨",
"no": "MikroAgent klar",
"it": "Microagent pronto",
"pt": "Microagent pronto",
"es": "Microagent listo",
"ar": "الوكيل المصغر جاهز",
"fr": "Microagent prêt",
"tr": "MikroAjan hazır",
"de": "Microagent bereit",
"uk": "Мікроагент готовий"
},
"EXPANDABLE_MESSAGE$SHOW_DETAILS": {
"en": "Show details",

View File

@@ -13,7 +13,6 @@ import {
useConversation,
} from "#/context/conversation-context";
import { Controls } from "#/components/features/controls/controls";
import { clearMessages, addUserMessage } from "#/state/chat-slice";
import { clearTerminal } from "#/state/command-slice";
import { useEffectOnce } from "#/hooks/use-effect-once";
import GlobeIcon from "#/icons/globe.svg?react";
@@ -34,7 +33,6 @@ import Security from "#/components/shared/modals/security/security";
import { useUserConversation } from "#/hooks/query/use-user-conversation";
import { ServedAppLabel } from "#/components/layout/served-app-label";
import { useSettings } from "#/hooks/query/use-settings";
import { clearFiles, clearInitialPrompt } from "#/state/initial-query-slice";
import { RootState } from "#/store";
import { displayErrorToast } from "#/utils/custom-toast-handlers";
import { useDocumentTitleFromState } from "#/hooks/use-document-title-from-state";
@@ -49,9 +47,7 @@ function AppContent() {
const { data: conversation, isFetched } = useUserConversation(
conversationId || null,
);
const { initialPrompt, files } = useSelector(
(state: RootState) => state.initialQuery,
);
const { curAgentState } = useSelector((state: RootState) => state.agent);
const dispatch = useDispatch();
const navigate = useNavigate();
@@ -71,25 +67,11 @@ function AppContent() {
}, [conversation, isFetched]);
React.useEffect(() => {
dispatch(clearMessages());
dispatch(clearTerminal());
dispatch(clearJupyter());
if (conversationId && (initialPrompt || files.length > 0)) {
dispatch(
addUserMessage({
content: initialPrompt || "",
imageUrls: files || [],
timestamp: new Date().toISOString(),
pending: true,
}),
);
dispatch(clearInitialPrompt());
dispatch(clearFiles());
}
}, [conversationId]);
useEffectOnce(() => {
dispatch(clearMessages());
dispatch(clearTerminal());
dispatch(clearJupyter());
});

View File

@@ -58,7 +58,7 @@ export function ErrorBoundary() {
export default function MainApp() {
const navigate = useNavigate();
const { pathname } = useLocation();
const tosPageStatus = useIsOnTosPage();
const isOnTosPage = useIsOnTosPage();
const { data: settings } = useSettings();
const { error } = useBalance();
const { migrateUserConsent } = useMigrateUserConsent();
@@ -68,7 +68,7 @@ export default function MainApp() {
const {
data: isAuthed,
isFetching: isFetchingAuth,
isError: authError,
isError: isAuthError,
} = useIsAuthed();
// Always call the hook, but we'll only use the result when not on TOS page
@@ -78,30 +78,30 @@ export default function MainApp() {
});
// When on TOS page, we don't use the GitHub auth URL
const effectiveGitHubAuthUrl = tosPageStatus ? null : gitHubAuthUrl;
const effectiveGitHubAuthUrl = isOnTosPage ? null : gitHubAuthUrl;
const [consentFormIsOpen, setConsentFormIsOpen] = React.useState(false);
React.useEffect(() => {
// Don't change language when on TOS page
if (!tosPageStatus && settings?.LANGUAGE) {
if (!isOnTosPage && settings?.LANGUAGE) {
i18n.changeLanguage(settings.LANGUAGE);
}
}, [settings?.LANGUAGE, tosPageStatus]);
}, [settings?.LANGUAGE, isOnTosPage]);
React.useEffect(() => {
// Don't show consent form when on TOS page
if (!tosPageStatus) {
if (!isOnTosPage) {
const consentFormModalIsOpen =
settings?.USER_CONSENTS_TO_ANALYTICS === null;
setConsentFormIsOpen(consentFormModalIsOpen);
}
}, [settings, tosPageStatus]);
}, [settings, isOnTosPage]);
React.useEffect(() => {
// Don't migrate user consent when on TOS page
if (!tosPageStatus) {
if (!isOnTosPage) {
// Migrate user consent to the server if it was previously stored in localStorage
migrateUserConsent({
handleAnalyticsWasPresentInLocalStorage: () => {
@@ -109,7 +109,7 @@ export default function MainApp() {
},
});
}
}, [tosPageStatus]);
}, [isOnTosPage]);
React.useEffect(() => {
if (settings?.IS_NEW_USER && config.data?.APP_MODE === "saas") {
@@ -120,22 +120,16 @@ export default function MainApp() {
React.useEffect(() => {
// Don't do any redirects when on TOS page
// Don't allow users to use the app if it 402s
if (!tosPageStatus && error?.status === 402 && pathname !== "/") {
if (!isOnTosPage && error?.status === 402 && pathname !== "/") {
navigate("/");
}
}, [error?.status, pathname, tosPageStatus]);
}, [error?.status, pathname, isOnTosPage]);
// When on TOS page, we don't make any API calls, so we need to handle this case
const userIsAuthed = tosPageStatus ? false : !!isAuthed && !authError;
// Only show the auth modal if:
// 1. User is not authenticated
// 2. We're not currently on the TOS page
// 3. We're in SaaS mode
const renderAuthModal =
!isAuthed &&
!isAuthError &&
!isFetchingAuth &&
!userIsAuthed &&
!tosPageStatus &&
!isOnTosPage &&
config.data?.APP_MODE === "saas";
return (

View File

@@ -4,7 +4,6 @@ import { StatusMessage } from "#/types/message";
import { queryClient } from "#/query-client-config";
import store from "#/store";
import { setCurStatusMessage } from "#/state/status-slice";
import { addErrorMessage } from "#/state/chat-slice";
import { trackError } from "#/utils/error-handler";
// Mock dependencies
@@ -101,9 +100,6 @@ describe("handleStatusMessage", () => {
metadata: { msgId: "ERROR_ID" },
});
// Verify that store.dispatch was called with addErrorMessage
expect(store.dispatch).toHaveBeenCalledWith(addErrorMessage(statusMessage));
// Verify that queryClient.invalidateQueries was not called
expect(queryClient.invalidateQueries).not.toHaveBeenCalled();
});

View File

@@ -1,13 +1,5 @@
import {
addAssistantMessage,
addAssistantAction,
addUserMessage,
addErrorMessage,
} from "#/state/chat-slice";
import { trackError } from "#/utils/error-handler";
import { appendSecurityAnalyzerInput } from "#/state/security-analyzer-slice";
import { setCode, setActiveFilepath } from "#/state/code-slice";
import { appendJupyterInput } from "#/state/jupyter-slice";
import { setCurStatusMessage } from "#/state/status-slice";
import { setMetrics } from "#/state/metrics-slice";
import store from "#/store";
@@ -21,67 +13,6 @@ import { handleObservationMessage } from "./observations";
import { appendInput } from "#/state/command-slice";
import { queryClient } from "#/query-client-config";
const messageActions = {
[ActionType.BROWSE]: (message: ActionMessage) => {
if (!message.args.thought && message.message) {
store.dispatch(addAssistantMessage(message.message));
}
},
[ActionType.BROWSE_INTERACTIVE]: (message: ActionMessage) => {
if (!message.args.thought && message.message) {
store.dispatch(addAssistantMessage(message.message));
}
},
[ActionType.WRITE]: (message: ActionMessage) => {
const { path, content } = message.args;
store.dispatch(setActiveFilepath(path));
store.dispatch(setCode(content));
},
[ActionType.MESSAGE]: (message: ActionMessage) => {
if (message.source === "user") {
store.dispatch(
addUserMessage({
content: message.args.content,
imageUrls:
typeof message.args.image_urls === "string"
? [message.args.image_urls]
: message.args.image_urls,
timestamp: message.timestamp,
pending: false,
}),
);
} else {
store.dispatch(addAssistantMessage(message.args.content));
}
},
[ActionType.RUN_IPYTHON]: (message: ActionMessage) => {
if (message.args.confirmation_state !== "rejected") {
store.dispatch(appendJupyterInput(message.args.code));
}
},
[ActionType.FINISH]: (message: ActionMessage) => {
store.dispatch(addAssistantMessage(message.args.final_thought));
let successPrediction = "";
if (message.args.task_completed === "partial") {
successPrediction =
"I believe that the task was **completed partially**.";
} else if (message.args.task_completed === "false") {
successPrediction = "I believe that the task was **not completed**.";
} else if (message.args.task_completed === "true") {
successPrediction =
"I believe that the task was **completed successfully**.";
}
if (successPrediction) {
// if final_thought is not empty, add a new line before the success prediction
if (message.args.final_thought) {
store.dispatch(addAssistantMessage(`\n${successPrediction}`));
} else {
store.dispatch(addAssistantMessage(successPrediction));
}
}
},
};
export function handleActionMessage(message: ActionMessage) {
if (message.args?.hidden) {
return;
@@ -103,26 +34,6 @@ export function handleActionMessage(message: ActionMessage) {
if ("args" in message && "security_risk" in message.args) {
store.dispatch(appendSecurityAnalyzerInput(message));
}
if (message.source === "agent") {
// Only add thought as a message if it's not a "think" action
if (
message.args &&
message.args.thought &&
message.action !== ActionType.THINK
) {
store.dispatch(addAssistantMessage(message.args.thought));
}
// Need to convert ActionMessage to RejectAction
// @ts-expect-error TODO: fix
store.dispatch(addAssistantAction(message));
}
if (message.action in messageActions) {
const actionFn =
messageActions[message.action as keyof typeof messageActions];
actionFn(message);
}
}
export function handleStatusMessage(message: StatusMessage) {
@@ -146,11 +57,6 @@ export function handleStatusMessage(message: StatusMessage) {
source: "chat",
metadata: { msgId: message.id },
});
store.dispatch(
addErrorMessage({
...message,
}),
);
}
}
@@ -161,33 +67,5 @@ export function handleAssistantMessage(message: Record<string, unknown>) {
handleObservationMessage(message as unknown as ObservationMessage);
} else if (message.status_update) {
handleStatusMessage(message as unknown as StatusMessage);
} else if (message.error) {
// Handle error messages from the server
const errorMessage =
typeof message.message === "string"
? message.message
: String(message.message || "Unknown error");
trackError({
message: errorMessage,
source: "websocket",
metadata: { raw_message: message },
});
store.dispatch(
addErrorMessage({
message: errorMessage,
}),
);
} else {
const errorMsg = "Unknown message type received";
trackError({
message: errorMsg,
source: "chat",
metadata: { raw_message: message },
});
store.dispatch(
addErrorMessage({
message: errorMsg,
}),
);
}
}

View File

@@ -2,14 +2,9 @@ import { setCurrentAgentState } from "#/state/agent-slice";
import { setUrl, setScreenshotSrc } from "#/state/browser-slice";
import store from "#/store";
import { ObservationMessage } from "#/types/message";
import { AgentState } from "#/types/agent-state";
import { appendOutput } from "#/state/command-slice";
import { appendJupyterOutput } from "#/state/jupyter-slice";
import ObservationType from "#/types/observation-type";
import {
addAssistantMessage,
addAssistantObservation,
} from "#/state/chat-slice";
export function handleObservationMessage(message: ObservationMessage) {
switch (message.observation) {
@@ -26,8 +21,14 @@ export function handleObservationMessage(message: ObservationMessage) {
break;
}
case ObservationType.RUN_IPYTHON:
// FIXME: render this as markdown
store.dispatch(appendJupyterOutput(message.content));
store.dispatch(
appendJupyterOutput({
content: message.content,
imageUrls: Array.isArray(message.extras?.image_urls)
? message.extras.image_urls
: undefined,
}),
);
break;
case ObservationType.BROWSE:
case ObservationType.BROWSE_INTERACTIVE:
@@ -42,11 +43,6 @@ export function handleObservationMessage(message: ObservationMessage) {
store.dispatch(setCurrentAgentState(message.extras.agent_state));
break;
case ObservationType.DELEGATE:
// TODO: better UI for delegation result (#2309)
if (message.content) {
store.dispatch(addAssistantMessage(message.content));
}
break;
case ObservationType.READ:
case ObservationType.EDIT:
case ObservationType.THINK:
@@ -56,107 +52,13 @@ export function handleObservationMessage(message: ObservationMessage) {
case ObservationType.MCP:
break; // We don't display the default message for these observations
default:
store.dispatch(addAssistantMessage(message.message));
break;
}
if (!message.extras?.hidden) {
// Convert the message to the appropriate observation type
const { observation } = message;
const baseObservation = {
...message,
source: "agent" as const,
};
switch (observation) {
case "agent_state_changed":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "agent_state_changed" as const,
extras: {
agent_state: (message.extras.agent_state as AgentState) || "idle",
},
}),
);
break;
case "recall":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "recall" as const,
extras: {
...(message.extras || {}),
recall_type:
(message.extras?.recall_type as
| "workspace_context"
| "knowledge") || "knowledge",
},
}),
);
break;
case "run":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "run" as const,
extras: {
command: String(message.extras.command || ""),
metadata: message.extras.metadata,
hidden: Boolean(message.extras.hidden),
},
}),
);
break;
case "read":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation,
extras: {
path: String(message.extras.path || ""),
impl_source: String(message.extras.impl_source || ""),
},
}),
);
break;
case "edit":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation,
extras: {
path: String(message.extras.path || ""),
diff: String(message.extras.diff || ""),
impl_source: String(message.extras.impl_source || ""),
},
}),
);
break;
case "run_ipython":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "run_ipython" as const,
extras: {
code: String(message.extras.code || ""),
},
}),
);
break;
case "delegate":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "delegate" as const,
extras: {
outputs:
typeof message.extras.outputs === "object"
? (message.extras.outputs as Record<string, unknown>)
: {},
},
}),
);
break;
case "browse":
if (message.extras?.screenshot) {
store.dispatch(setScreenshotSrc(message.extras.screenshot));
@@ -164,45 +66,6 @@ export function handleObservationMessage(message: ObservationMessage) {
if (message.extras?.url) {
store.dispatch(setUrl(message.extras.url));
}
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "browse" as const,
extras: {
url: String(message.extras.url || ""),
screenshot: String(message.extras.screenshot || ""),
error: Boolean(message.extras.error),
open_page_urls: Array.isArray(message.extras.open_page_urls)
? message.extras.open_page_urls
: [],
active_page_index: Number(message.extras.active_page_index || 0),
dom_object:
typeof message.extras.dom_object === "object"
? (message.extras.dom_object as Record<string, unknown>)
: {},
axtree_object:
typeof message.extras.axtree_object === "object"
? (message.extras.axtree_object as Record<string, unknown>)
: {},
extra_element_properties:
typeof message.extras.extra_element_properties === "object"
? (message.extras.extra_element_properties as Record<
string,
unknown
>)
: {},
last_browser_action: String(
message.extras.last_browser_action || "",
),
last_browser_action_error:
message.extras.last_browser_action_error,
focused_element_bid: String(
message.extras.focused_element_bid || "",
),
},
}),
);
break;
case "browse_interactive":
if (message.extras?.screenshot) {
@@ -211,65 +74,6 @@ export function handleObservationMessage(message: ObservationMessage) {
if (message.extras?.url) {
store.dispatch(setUrl(message.extras.url));
}
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "browse_interactive" as const,
extras: {
url: String(message.extras.url || ""),
screenshot: String(message.extras.screenshot || ""),
error: Boolean(message.extras.error),
open_page_urls: Array.isArray(message.extras.open_page_urls)
? message.extras.open_page_urls
: [],
active_page_index: Number(message.extras.active_page_index || 0),
dom_object:
typeof message.extras.dom_object === "object"
? (message.extras.dom_object as Record<string, unknown>)
: {},
axtree_object:
typeof message.extras.axtree_object === "object"
? (message.extras.axtree_object as Record<string, unknown>)
: {},
extra_element_properties:
typeof message.extras.extra_element_properties === "object"
? (message.extras.extra_element_properties as Record<
string,
unknown
>)
: {},
last_browser_action: String(
message.extras.last_browser_action || "",
),
last_browser_action_error:
message.extras.last_browser_action_error,
focused_element_bid: String(
message.extras.focused_element_bid || "",
),
},
}),
);
break;
case "error":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "error" as const,
source: "user" as const,
extras: {
error_id: message.extras.error_id,
},
}),
);
break;
case "mcp":
store.dispatch(
addAssistantObservation({
...baseObservation,
observation: "mcp" as const,
}),
);
break;
default:
// For any unhandled observation types, just ignore them

View File

@@ -1,369 +0,0 @@
import { createSlice, PayloadAction } from "@reduxjs/toolkit";
import type { Message } from "#/message";
import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
import { OpenHandsAction } from "#/types/core/actions";
import { OpenHandsEventType } from "#/types/core/base";
import {
CommandObservation,
IPythonObservation,
OpenHandsObservation,
RecallObservation,
} from "#/types/core/observations";
type SliceState = {
messages: Message[];
systemMessage: {
content: string;
tools: Array<Record<string, unknown>> | null;
openhands_version: string | null;
agent_class: string | null;
} | null;
};
const MAX_CONTENT_LENGTH = 1000;
const HANDLED_ACTIONS: OpenHandsEventType[] = [
"run",
"run_ipython",
"write",
"read",
"browse",
"browse_interactive",
"edit",
"recall",
"think",
"system",
"call_tool_mcp",
"mcp",
];
function getRiskText(risk: ActionSecurityRisk) {
switch (risk) {
case ActionSecurityRisk.LOW:
return "Low Risk";
case ActionSecurityRisk.MEDIUM:
return "Medium Risk";
case ActionSecurityRisk.HIGH:
return "High Risk";
case ActionSecurityRisk.UNKNOWN:
default:
return "Unknown Risk";
}
}
const initialState: SliceState = {
messages: [],
systemMessage: null,
};
export const chatSlice = createSlice({
name: "chat",
initialState,
reducers: {
addUserMessage(
state,
action: PayloadAction<{
content: string;
imageUrls: string[];
timestamp: string;
pending?: boolean;
}>,
) {
const message: Message = {
type: "thought",
sender: "user",
content: action.payload.content,
imageUrls: action.payload.imageUrls,
timestamp: action.payload.timestamp || new Date().toISOString(),
pending: !!action.payload.pending,
};
// Remove any pending messages
let i = state.messages.length;
while (i) {
i -= 1;
const m = state.messages[i] as Message;
if (m.pending) {
state.messages.splice(i, 1);
}
}
state.messages.push(message);
},
addAssistantMessage(state: SliceState, action: PayloadAction<string>) {
const message: Message = {
type: "thought",
sender: "assistant",
content: action.payload,
imageUrls: [],
timestamp: new Date().toISOString(),
pending: false,
};
state.messages.push(message);
},
addAssistantAction(
state: SliceState,
action: PayloadAction<OpenHandsAction>,
) {
const actionID = action.payload.action;
if (!HANDLED_ACTIONS.includes(actionID)) {
return;
}
const translationID = `ACTION_MESSAGE$${actionID.toUpperCase()}`;
let text = "";
if (actionID === "system") {
// Store the system message in the state
state.systemMessage = {
content: action.payload.args.content,
tools: action.payload.args.tools,
openhands_version: action.payload.args.openhands_version,
agent_class: action.payload.args.agent_class,
};
// Don't add a message for system actions
return;
}
if (actionID === "run") {
text = `Command:\n\`${action.payload.args.command}\``;
} else if (actionID === "run_ipython") {
text = `\`\`\`\n${action.payload.args.code}\n\`\`\``;
} else if (actionID === "write") {
let { content } = action.payload.args;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
text = `${action.payload.args.path}\n${content}`;
} else if (actionID === "browse") {
text = `Browsing ${action.payload.args.url}`;
} else if (actionID === "browse_interactive") {
// Include the browser_actions in the content
text = `**Action:**\n\n\`\`\`python\n${action.payload.args.browser_actions}\n\`\`\``;
} else if (actionID === "recall") {
// skip recall actions
return;
} else if (actionID === "call_tool_mcp") {
// Format MCP action with name and arguments
const name = action.payload.args.name || "";
const args = action.payload.args.arguments || {};
text = `**MCP Tool Call:** ${name}\n\n`;
// Include thought if available
if (action.payload.args.thought) {
text += `\n\n**Thought:**\n${action.payload.args.thought}`;
}
text += `\n\n**Arguments:**\n\`\`\`json\n${JSON.stringify(args, null, 2)}\n\`\`\``;
}
if (actionID === "run" || actionID === "run_ipython") {
if (
action.payload.args.confirmation_state === "awaiting_confirmation"
) {
text += `\n\n${getRiskText(action.payload.args.security_risk as unknown as ActionSecurityRisk)}`;
}
} else if (actionID === "think") {
text = action.payload.args.thought;
}
const message: Message = {
type: "action",
sender: "assistant",
translationID,
eventID: action.payload.id,
content: text,
imageUrls: [],
timestamp: new Date().toISOString(),
action,
};
state.messages.push(message);
},
addAssistantObservation(
state: SliceState,
observation: PayloadAction<OpenHandsObservation>,
) {
const observationID = observation.payload.observation;
if (!HANDLED_ACTIONS.includes(observationID)) {
return;
}
// Special handling for RecallObservation - create a new message instead of updating an existing one
if (observationID === "recall") {
const recallObs = observation.payload as RecallObservation;
let content = ``;
// Handle workspace context
if (recallObs.extras.recall_type === "workspace_context") {
if (recallObs.extras.repo_name) {
content += `\n\n**Repository:** ${recallObs.extras.repo_name}`;
}
if (recallObs.extras.repo_directory) {
content += `\n\n**Directory:** ${recallObs.extras.repo_directory}`;
}
if (recallObs.extras.date) {
content += `\n\n**Date:** ${recallObs.extras.date}`;
}
if (
recallObs.extras.runtime_hosts &&
Object.keys(recallObs.extras.runtime_hosts).length > 0
) {
content += `\n\n**Available Hosts**`;
for (const [host, port] of Object.entries(
recallObs.extras.runtime_hosts,
)) {
content += `\n\n- ${host} (port ${port})`;
}
}
if (recallObs.extras.repo_instructions) {
content += `\n\n**Repository Instructions:**\n\n${recallObs.extras.repo_instructions}`;
}
if (recallObs.extras.additional_agent_instructions) {
content += `\n\n**Additional Instructions:**\n\n${recallObs.extras.additional_agent_instructions}`;
}
}
// Create a new message for the observation
// Use the correct translation ID format that matches what's in the i18n file
const translationID = `OBSERVATION_MESSAGE$${observationID.toUpperCase()}`;
// Handle microagent knowledge
if (
recallObs.extras.microagent_knowledge &&
recallObs.extras.microagent_knowledge.length > 0
) {
content += `\n\n**Triggered Microagent Knowledge:**`;
for (const knowledge of recallObs.extras.microagent_knowledge) {
content += `\n\n- **${knowledge.name}** (triggered by keyword: ${knowledge.trigger})\n\n\`\`\`\n${knowledge.content}\n\`\`\``;
}
}
const message: Message = {
type: "action",
sender: "assistant",
translationID,
eventID: observation.payload.id,
content,
imageUrls: [],
timestamp: new Date().toISOString(),
success: true,
};
state.messages.push(message);
return; // Skip the normal observation handling below
}
// Normal handling for other observation types
const translationID = `OBSERVATION_MESSAGE$${observationID.toUpperCase()}`;
const causeID = observation.payload.cause;
const causeMessage = state.messages.find(
(message) => message.eventID === causeID,
);
if (!causeMessage) {
return;
}
causeMessage.translationID = translationID;
causeMessage.observation = observation;
// Set success property based on observation type
if (observationID === "run") {
const commandObs = observation.payload as CommandObservation;
// If exit_code is -1, it means the command timed out, so we set success to undefined
// to not show any status indicator
if (commandObs.extras.metadata.exit_code === -1) {
causeMessage.success = undefined;
} else {
causeMessage.success = commandObs.extras.metadata.exit_code === 0;
}
} else if (observationID === "run_ipython") {
// For IPython, we consider it successful if there's no error message
const ipythonObs = observation.payload as IPythonObservation;
causeMessage.success = !ipythonObs.content
.toLowerCase()
.includes("error:");
} else if (observationID === "read" || observationID === "edit") {
// For read/edit operations, we consider it successful if there's content and no error
if (observation.payload.extras.impl_source === "oh_aci") {
causeMessage.success =
observation.payload.content.length > 0 &&
!observation.payload.content.startsWith("ERROR:\n");
} else {
causeMessage.success =
observation.payload.content.length > 0 &&
!observation.payload.content.toLowerCase().includes("error:");
}
}
if (observationID === "run" || observationID === "run_ipython") {
let { content } = observation.payload;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
content = `${causeMessage.content}\n\nOutput:\n\`\`\`\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
causeMessage.content = content; // Observation content includes the action
} else if (observationID === "read") {
causeMessage.content = `\`\`\`\n${observation.payload.content}\n\`\`\``; // Content is already truncated by the ACI
} else if (observationID === "edit") {
if (causeMessage.success) {
causeMessage.content = `\`\`\`diff\n${observation.payload.extras.diff}\n\`\`\``; // Content is already truncated by the ACI
} else {
causeMessage.content = observation.payload.content;
}
} else if (observationID === "browse") {
let content = `**URL:** ${observation.payload.extras.url}\n`;
if (observation.payload.extras.error) {
content += `\n\n**Error:**\n${observation.payload.extras.error}\n`;
}
content += `\n\n**Output:**\n${observation.payload.content}`;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...(truncated)`;
}
causeMessage.content = content;
} else if (observationID === "mcp") {
// For MCP observations, we want to show the content as formatted output
// similar to how run/run_ipython actions are handled
let { content } = observation.payload;
if (content.length > MAX_CONTENT_LENGTH) {
content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
}
content = `${causeMessage.content}\n\n**Output:**\n\`\`\`\n${content.trim() || "[MCP Tool finished execution with no output]"}\n\`\`\``;
causeMessage.content = content; // Observation content includes the action
// Set success based on whether there's an error message
causeMessage.success = !observation.payload.content
.toLowerCase()
.includes("error:");
}
},
addErrorMessage(
state: SliceState,
action: PayloadAction<{ id?: string; message: string }>,
) {
const { id, message } = action.payload;
state.messages.push({
translationID: id,
content: message,
type: "error",
sender: "assistant",
timestamp: new Date().toISOString(),
});
},
clearMessages(state: SliceState) {
state.messages = [];
state.systemMessage = null;
},
},
});
export const {
addUserMessage,
addAssistantMessage,
addAssistantAction,
addAssistantObservation,
addErrorMessage,
clearMessages,
} = chatSlice.actions;
// Selectors
export const selectSystemMessage = (state: { chat: SliceState }) =>
state.chat.systemMessage;
export default chatSlice.reducer;

View File

@@ -3,6 +3,7 @@ import { createSlice } from "@reduxjs/toolkit";
export type Cell = {
content: string;
type: "input" | "output";
imageUrls?: string[];
};
const initialCells: Cell[] = [];
@@ -17,7 +18,11 @@ export const jupyterSlice = createSlice({
state.cells.push({ content: action.payload, type: "input" });
},
appendJupyterOutput: (state, action) => {
state.cells.push({ content: action.payload, type: "output" });
state.cells.push({
content: action.payload.content,
type: "output",
imageUrls: action.payload.imageUrls,
});
},
clearJupyter: (state) => {
state.cells = [];

View File

@@ -1,7 +1,6 @@
import { combineReducers, configureStore } from "@reduxjs/toolkit";
import agentReducer from "./state/agent-slice";
import browserReducer from "./state/browser-slice";
import chatReducer from "./state/chat-slice";
import codeReducer from "./state/code-slice";
import fileStateReducer from "./state/file-state-slice";
import initialQueryReducer from "./state/initial-query-slice";
@@ -15,7 +14,6 @@ export const rootReducer = combineReducers({
fileState: fileStateReducer,
initialQuery: initialQueryReducer,
browser: browserReducer,
chat: chatReducer,
code: codeReducer,
cmd: commandReducer,
agent: agentReducer,

View File

@@ -2,6 +2,7 @@ export type OpenHandsEventType =
| "message"
| "system"
| "agent_state_changed"
| "change_agent_state"
| "run"
| "read"
| "write"
@@ -16,11 +17,14 @@ export type OpenHandsEventType =
| "error"
| "recall"
| "mcp"
| "call_tool_mcp";
| "call_tool_mcp"
| "user_rejected";
export type OpenHandsSourceType = "agent" | "user" | "environment";
interface OpenHandsBaseEvent {
id: number;
source: "agent" | "user";
source: OpenHandsSourceType;
message: string;
timestamp: string; // ISO 8601
}

View File

@@ -0,0 +1,59 @@
import { OpenHandsParsedEvent } from ".";
import {
UserMessageAction,
AssistantMessageAction,
OpenHandsAction,
SystemMessageAction,
} from "./actions";
import {
CommandObservation,
ErrorObservation,
OpenHandsObservation,
} from "./observations";
export const isOpenHandsAction = (
event: OpenHandsParsedEvent,
): event is OpenHandsAction => "action" in event;
export const isOpenHandsObservation = (
event: OpenHandsParsedEvent,
): event is OpenHandsObservation => "observation" in event;
export const isUserMessage = (
event: OpenHandsParsedEvent,
): event is UserMessageAction =>
isOpenHandsAction(event) &&
event.source === "user" &&
event.action === "message";
export const isAssistantMessage = (
event: OpenHandsParsedEvent,
): event is AssistantMessageAction =>
isOpenHandsAction(event) &&
event.source === "agent" &&
(event.action === "message" || event.action === "finish");
export const isErrorObservation = (
event: OpenHandsParsedEvent,
): event is ErrorObservation =>
isOpenHandsObservation(event) && event.observation === "error";
export const isCommandObservation = (
event: OpenHandsParsedEvent,
): event is CommandObservation =>
isOpenHandsObservation(event) && event.observation === "run";
export const isFinishAction = (
event: OpenHandsParsedEvent,
): event is AssistantMessageAction =>
isOpenHandsAction(event) && event.action === "finish";
export const isSystemMessage = (
event: OpenHandsParsedEvent,
): event is SystemMessageAction =>
isOpenHandsAction(event) && event.action === "system";
export const isRejectObservation = (
event: OpenHandsParsedEvent,
): event is OpenHandsObservation =>
isOpenHandsObservation(event) && event.observation === "user_rejected";

View File

@@ -23,6 +23,7 @@ export interface IPythonObservation
source: "agent";
extras: {
code: string;
image_urls?: string[];
};
}
@@ -123,6 +124,7 @@ export interface RecallObservation extends OpenHandsObservationEvent<"recall"> {
repo_directory?: string;
repo_instructions?: string;
runtime_hosts?: Record<string, number>;
custom_secrets_descriptions?: Record<string, string>;
additional_agent_instructions?: string;
date?: string;
microagent_knowledge?: MicroagentKnowledge[];
@@ -136,6 +138,14 @@ export interface MCPObservation extends OpenHandsObservationEvent<"mcp"> {
};
}
export interface UserRejectedObservation
extends OpenHandsObservationEvent<"user_rejected"> {
source: "agent";
extras: {
// Add any specific fields for MCP observations
};
}
export type OpenHandsObservation =
| AgentStateChangeObservation
| AgentThinkObservation
@@ -149,4 +159,5 @@ export type OpenHandsObservation =
| EditObservation
| ErrorObservation
| RecallObservation
| MCPObservation;
| MCPObservation
| UserRejectedObservation;

View File

@@ -1,26 +1,32 @@
export type JupyterLine = { type: "plaintext" | "image"; content: string };
export type JupyterLine = {
type: "plaintext" | "image";
content: string;
url?: string;
};
const IMAGE_PREFIX = "![image](data:image/png;base64,";
export const parseCellContent = (content: string) => {
export const parseCellContent = (content: string, imageUrls?: string[]) => {
const lines: JupyterLine[] = [];
let currentText = "";
// First, process the text content
for (const line of content.split("\n")) {
if (line.startsWith(IMAGE_PREFIX)) {
if (currentText) {
lines.push({ type: "plaintext", content: currentText });
currentText = ""; // Reset after pushing plaintext
}
lines.push({ type: "image", content: line });
} else {
currentText += `${line}\n`;
}
currentText += `${line}\n`;
}
if (currentText) {
lines.push({ type: "plaintext", content: currentText });
}
// Then, add image lines if we have image URLs
if (imageUrls && imageUrls.length > 0) {
imageUrls.forEach((url) => {
lines.push({
type: "image",
content: `![image](${url})`,
url,
});
});
}
return lines;
};

View File

@@ -0,0 +1,15 @@
---
# This is a repo microagent that is always activated
# to include necessary default tools implemented with MCP
name: default-tools
type: repo
version: 1.0.0
agent: CodeActAgent
mcp_tools:
stdio_servers:
- name: "fetch"
command: "uvx"
args: ["mcp-server-fetch"]
# We leave the body empty because MCP tools will automatically add the
# tool description for LLMs in tool calls, so there's no need to add extra descriptions.
---

137
microagents/ssh.md Normal file
View File

@@ -0,0 +1,137 @@
---
name: SSH Microagent
type: knowledge
version: 1.0.0
agent: CodeActAgent
triggers:
- ssh
- remote server
- remote machine
- remote host
- remote connection
- secure shell
- ssh keys
---
# SSH Microagent
This microagent provides capabilities for establishing and managing SSH connections to remote machines.
## Capabilities
- Establish SSH connections using password or key-based authentication
- Generate and manage SSH key pairs
- Configure SSH for easier connections
- Execute commands on remote machines
- Transfer files between local and remote machines
- Manage SSH configurations and known hosts
## Authentication Methods
### Password Authentication
```bash
ssh username@hostname
```
When prompted, you should ask the user for their password or a private key.
### Key-Based Authentication
Generate a new SSH key pair:
```bash
ssh-keygen -t ed25519 -f ~/.ssh/key_name -C "comment" -N ""
```
Copy the public key to the remote server:
```bash
ssh-copy-id -i ~/.ssh/key_name.pub username@hostname
```
Connect using the private key:
```bash
ssh -i ~/.ssh/key_name username@hostname
```
## SSH Configuration
Create or edit the SSH config file for easier connections:
```bash
mkdir -p ~/.ssh
cat > ~/.ssh/config << 'EOF'
Host alias
HostName hostname_or_ip
User username
IdentityFile ~/.ssh/key_name
Port 22
ServerAliveInterval 60
EOF
chmod 600 ~/.ssh/config
```
Then connect using the alias:
```bash
ssh alias
```
## Common SSH Options
- `-p PORT`: Connect to a specific port
- `-X`: Enable X11 forwarding
- `-L local_port:remote_host:remote_port`: Set up local port forwarding
- `-R remote_port:local_host:local_port`: Set up remote port forwarding
- `-N`: Do not execute a remote command (useful for port forwarding)
- `-f`: Run in background
- `-v`: Verbose mode (add more v's for increased verbosity)
## File Transfer with SCP
Copy a file to the remote server:
```bash
scp /path/to/local/file username@hostname:/path/to/remote/directory/
```
Copy a file from the remote server:
```bash
scp username@hostname:/path/to/remote/file /path/to/local/directory/
```
Copy a directory recursively:
```bash
scp -r /path/to/local/directory username@hostname:/path/to/remote/directory/
```
## SSH Agent
Start the SSH agent:
```bash
eval "$(ssh-agent -s)"
```
Add a key to the agent:
```bash
ssh-add ~/.ssh/key_name
```
## Troubleshooting
- Check SSH service status on remote: `systemctl status sshd`
- Verify SSH port is open: `nc -zv hostname 22`
- Debug connection issues: `ssh -vvv username@hostname`
- Check permissions: SSH private keys should have 600 permissions (`chmod 600 ~/.ssh/key_name`)
- Verify known_hosts: If host key changed, remove the old entry with `ssh-keygen -R hostname`
## Secure SSH Key Management
### Local Storage with Proper Permissions
The most basic approach is to ensure proper file permissions:
```bash
# Set correct permissions for private keys
chmod 600 ~/.ssh/id_ed25519
# Set correct permissions for public keys
chmod 644 ~/.ssh/id_ed25519.pub
# Set correct permissions for SSH directory
chmod 700 ~/.ssh
```

View File

@@ -22,7 +22,7 @@ def get_version():
pass
try:
from pkg_resources import DistributionNotFound, get_distribution
from pkg_resources import DistributionNotFound, get_distribution # type: ignore
return get_distribution(__package_name__).version
except (ImportError, DistributionNotFound):

View File

@@ -10,7 +10,7 @@ The CodeAct agent operates through a function calling interface. At each turn, t
2. **CodeAct**: Execute actions through a set of well-defined tools:
- Execute Linux `bash` commands with `execute_bash`
- Run Python code in an [IPython](https://ipython.org/) environment with `execute_ipython_cell`
- Interact with web browsers using `browser` and `web_read`
- Interact with web browsers using `browser` and `fetch`
- Edit files using `str_replace_editor` or `edit_file`
![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)

View File

@@ -20,7 +20,6 @@ from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
create_str_replace_editor_tool,
)
from openhands.agenthub.codeact_agent.tools.think import ThinkTool
from openhands.agenthub.codeact_agent.tools.web_read import WebReadTool
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
@@ -123,7 +122,6 @@ class CodeActAgent(Agent):
if sys.platform == 'win32':
logger.warning('Windows runtime does not support browsing yet')
else:
tools.append(WebReadTool)
tools.append(BrowserTool)
if self.config.enable_jupyter:
tools.append(IPythonTool)

View File

@@ -15,7 +15,6 @@ from openhands.agenthub.codeact_agent.tools import (
IPythonTool,
LLMBasedFileEditTool,
ThinkTool,
WebReadTool,
create_cmd_run_tool,
create_str_replace_editor_tool,
)
@@ -212,16 +211,6 @@ def response_to_actions(
)
action = BrowseInteractiveAction(browser_actions=arguments['code'])
# ================================================
# WebReadTool (simplified browsing)
# ================================================
elif tool_call.function.name == WebReadTool['function']['name']:
if 'url' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "url" in tool call {tool_call.function.name}'
)
action = BrowseURLAction(url=arguments['url'])
# ================================================
# MCPAction (MCP)
# ================================================

View File

@@ -8,7 +8,7 @@ At the user's request, repository {{ repository_info.repo_name }} has been clone
{{ repository_instructions }}
</REPOSITORY_INSTRUCTIONS>
{% endif %}
{% if runtime_info and (runtime_info.available_hosts or runtime_info.additional_agent_instructions) -%}
{% if runtime_info -%}
<RUNTIME_INFORMATION>
{% if runtime_info.available_hosts %}
The user has access to the following hosts for accessing a web application,
@@ -24,8 +24,21 @@ For example, if you are using vite.config.js, you should set server.host and ser
{% if runtime_info.additional_agent_instructions %}
{{ runtime_info.additional_agent_instructions }}
{% endif %}
{% if runtime_info.custom_secrets_descriptions %}
<CUSTOM_SECRETS>
You are have access to the following environment variables
{% for secret_name, secret_description in runtime_info.custom_secrets_descriptions.items() %}
* $**{{ secret_name }}**: {{ secret_description }}
{% endfor %}
</CUSTOM_SECRETS>
{% endif %}
{% if runtime_info.date %}
Today's date is {{ runtime_info.date }} (UTC).
{% endif %}
</RUNTIME_INFORMATION>
{% endif %}
{% if runtime_info and runtime_info.context_message -%}
<CONTEXT_MESSAGE>
{{ runtime_info.context_message }}
</CONTEXT_MESSAGE>
{% endif %}

View File

@@ -5,7 +5,6 @@ from .ipython import IPythonTool
from .llm_based_edit import LLMBasedFileEditTool
from .str_replace_editor import create_str_replace_editor_tool
from .think import ThinkTool
from .web_read import WebReadTool
__all__ = [
'BrowserTool',
@@ -14,6 +13,5 @@ __all__ = [
'IPythonTool',
'LLMBasedFileEditTool',
'create_str_replace_editor_tool',
'WebReadTool',
'ThinkTool',
]

View File

@@ -2,6 +2,8 @@ import sys
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
from openhands.llm.tool_names import EXECUTE_BASH_TOOL_NAME
_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
### Command Execution
@@ -45,7 +47,7 @@ def create_cmd_run_tool(
return ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name=refine_prompt('execute_bash'),
name=EXECUTE_BASH_TOOL_NAME,
description=refine_prompt(description),
parameters={
'type': 'object',

View File

@@ -1,6 +1,8 @@
from browsergym.core.action.highlevel import HighLevelActionSet
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
from openhands.llm.tool_names import BROWSER_TOOL_NAME
# from browsergym/core/action/highlevel.py
_browser_action_space = HighLevelActionSet(
subsets=['bid', 'nav'],
@@ -141,7 +143,7 @@ for _, action in _browser_action_space.action_set.items():
BrowserTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='browser',
name=BROWSER_TOOL_NAME,
description=_BROWSER_DESCRIPTION,
parameters={
'type': 'object',

View File

@@ -1,5 +1,7 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
from openhands.llm.tool_names import FINISH_TOOL_NAME
_FINISH_DESCRIPTION = """Signals the completion of the current task or conversation.
Use this tool when:
@@ -18,7 +20,7 @@ The task_completed field should be set to True if you believed you have complete
FinishTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='finish',
name=FINISH_TOOL_NAME,
description=_FINISH_DESCRIPTION,
parameters={
'type': 'object',

View File

@@ -1,5 +1,7 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
from openhands.llm.tool_names import STR_REPLACE_EDITOR_TOOL_NAME
_DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
* State is persistent across command calls and discussions with the user
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
@@ -55,7 +57,7 @@ def create_str_replace_editor_tool(
return ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='str_replace_editor',
name=STR_REPLACE_EDITOR_TOOL_NAME,
description=description,
parameters={
'type': 'object',

View File

@@ -1,26 +0,0 @@
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
_WEB_DESCRIPTION = """Read (convert to markdown) content from a webpage. You should prefer using the `web_read` tool over the `browser` tool, but do use the `browser` tool if you need to interact with a webpage (e.g., click a button, fill out a form, etc.) OR read a webpage that contains images.
You may use the `web_read` tool to read text content from a webpage, and even search the webpage content using a Google search query (e.g., url=`https://www.google.com/search?q=YOUR_QUERY`).
Only the most recently read webpage will be available to read. This means you should not follow a link to a new page until you are done with the information on the current page.
"""
WebReadTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='web_read',
description=_WEB_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'url': {
'type': 'string',
'description': 'The URL of the webpage to read. You can also use a Google search query here (e.g., `https://www.google.com/search?q=YOUR_QUERY`).',
}
},
'required': ['url'],
},
),
)

View File

@@ -17,7 +17,6 @@ from openhands.agenthub.codeact_agent.function_calling import (
from openhands.agenthub.codeact_agent.tools import (
FinishTool,
ThinkTool,
WebReadTool,
)
from openhands.agenthub.readonly_agent.tools import (
GlobTool,
@@ -191,16 +190,6 @@ def response_to_actions(
glob_cmd = glob_to_cmdrun(pattern, path)
action = CmdRunAction(command=glob_cmd, is_input=False)
# ================================================
# WebReadTool (simplified browsing)
# ================================================
elif tool_call.function.name == WebReadTool['function']['name']:
if 'url' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "url" in tool call {tool_call.function.name}'
)
action = BrowseURLAction(url=arguments['url'])
# ================================================
# MCPAction (MCP)
# ================================================
@@ -249,7 +238,6 @@ def get_tools() -> list[ChatCompletionToolParam]:
return [
ThinkTool,
FinishTool,
WebReadTool,
GrepTool,
GlobTool,
ViewTool,

View File

@@ -1,7 +1,6 @@
import asyncio
import logging
import sys
from uuid import uuid4
from prompt_toolkit.shortcuts import clear
@@ -42,6 +41,7 @@ from openhands.core.setup import (
create_controller,
create_memory,
create_runtime,
generate_sid,
initialize_repository_for_runtime,
)
from openhands.events import EventSource, EventStreamSubscriber
@@ -81,6 +81,16 @@ async def cleanup_session(
if pending:
await asyncio.wait(pending, timeout=5.0)
event_stream = runtime.event_stream
# Save the final state
end_state = controller.get_state()
end_state.save_to_session(
event_stream.sid,
event_stream.file_store,
event_stream.user_id,
)
# Reset agent, close runtime and controller
agent.reset()
runtime.close()
@@ -94,12 +104,13 @@ async def run_session(
config: AppConfig,
settings_store: FileSettingsStore,
current_dir: str,
initial_user_action: str | None = None,
task_content: str | None = None,
session_name: str | None = None,
) -> bool:
reload_microagents = False
new_session_requested = False
sid = str(uuid4())
sid = generate_sid(config, session_name)
is_loaded = asyncio.Event()
is_paused = asyncio.Event() # Event to track agent pause requests
always_confirm_mode = False # Flag to enable always confirm mode
@@ -120,7 +131,7 @@ async def run_session(
agent=agent,
)
controller, _ = create_controller(agent, runtime, config)
controller, initial_state = create_controller(agent, runtime, config)
event_stream = runtime.event_stream
@@ -218,10 +229,9 @@ async def run_session(
def on_event(event: Event) -> None:
loop.create_task(on_event_async(event))
event_stream.subscribe(EventStreamSubscriber.MAIN, on_event, str(uuid4()))
event_stream.subscribe(EventStreamSubscriber.MAIN, on_event, sid)
await runtime.connect()
await add_mcp_tools_to_agent(agent, runtime, config.mcp)
# Initialize repository if needed
repo_directory = None
@@ -240,6 +250,10 @@ async def run_session(
repo_directory=repo_directory,
)
# Add MCP tools to the agent
if agent.config.enable_mcp:
await add_mcp_tools_to_agent(agent, runtime, memory, config.mcp)
# Clear loading animation
is_loaded.set()
@@ -249,17 +263,38 @@ async def run_session(
# Show OpenHands banner and session ID
display_banner(session_id=sid)
# Show OpenHands welcome
display_welcome_message()
welcome_message = 'What do you want to build?' # from the application
initial_message = '' # from the user
if initial_user_action:
# If there's an initial user action, enqueue it and do not prompt again
display_initial_user_prompt(initial_user_action)
event_stream.add_event(
MessageAction(content=initial_user_action), EventSource.USER
)
if task_content:
initial_message = task_content
# If we loaded a state, we are resuming a previous session
if initial_state is not None:
logger.info(f'Resuming session: {sid}')
if initial_state.last_error:
# If the last session ended in an error, provide a message.
initial_message = (
'NOTE: the last session ended with an error.'
"Let's get back on track. Do NOT resume your task. Ask me about it."
)
else:
# If we are resuming, we already have a task
initial_message = ''
welcome_message += '\nLoading previous conversation.'
# Show OpenHands welcome
display_welcome_message(welcome_message)
# The prompt_for_next_task will be triggered if the agent enters AWAITING_USER_INPUT.
# If the restored state is already AWAITING_USER_INPUT, on_event_async will handle it.
if initial_message:
display_initial_user_prompt(initial_message)
event_stream.add_event(MessageAction(content=initial_message), EventSource.USER)
else:
# Otherwise prompt for the user's first message right away
# No session restored, no initial action: prompt for the user's first message
asyncio.create_task(prompt_for_next_task(''))
await run_agent_until_done(
@@ -334,7 +369,12 @@ async def main(loop: asyncio.AbstractEventLoop) -> None:
# Run the first session
new_session_requested = await run_session(
loop, config, settings_store, current_dir, task_str
loop,
config,
settings_store,
current_dir,
task_str,
session_name=args.name,
)
# If a new session was requested, run it

View File

@@ -145,14 +145,20 @@ def display_banner(session_id: str) -> None:
print_formatted_text('')
def display_welcome_message() -> None:
def display_welcome_message(message: str = '') -> None:
print_formatted_text(
HTML("<gold>Let's start building!</gold>\n"), style=DEFAULT_STYLE
)
print_formatted_text(
HTML('What do you want to build? <grey>Type /help for help</grey>'),
style=DEFAULT_STYLE,
)
if message:
print_formatted_text(
HTML(f'{message} <grey>Type /help for help</grey>'),
style=DEFAULT_STYLE,
)
else:
print_formatted_text(
HTML('What do you want to build? <grey>Type /help for help</grey>'),
style=DEFAULT_STYLE,
)
def display_initial_user_prompt(prompt: str) -> None:

View File

@@ -8,7 +8,6 @@ from enum import Enum
from typing import Any
import openhands
from openhands.controller.state.task import RootTask
from openhands.core.logger import openhands_logger as logger
from openhands.core.schema import AgentState
from openhands.events.action import (
@@ -75,7 +74,6 @@ class State:
- additional task-specific data
"""
root_task: RootTask = field(default_factory=RootTask)
session_id: str = ''
# global iteration for the current task
iteration: int = 0

View File

@@ -1,234 +0,0 @@
from __future__ import annotations
from openhands.core.exceptions import (
LLMMalformedActionError,
TaskInvalidStateError,
)
from openhands.core.logger import openhands_logger as logger
OPEN_STATE = 'open'
COMPLETED_STATE = 'completed'
ABANDONED_STATE = 'abandoned'
IN_PROGRESS_STATE = 'in_progress'
VERIFIED_STATE = 'verified'
STATES = [
OPEN_STATE,
COMPLETED_STATE,
ABANDONED_STATE,
IN_PROGRESS_STATE,
VERIFIED_STATE,
]
class Task:
id: str
goal: str
parent: 'Task' | None
subtasks: list['Task']
def __init__(
self,
parent: 'Task',
goal: str,
state: str = OPEN_STATE,
subtasks: list[dict | 'Task'] | None = None, # noqa: B006
) -> None:
"""Initializes a new instance of the Task class.
Args:
parent: The parent task, or None if it is the root task.
goal: The goal of the task.
state: The initial state of the task.
subtasks: A list of subtasks associated with this task.
"""
if subtasks is None:
subtasks = []
if parent.id:
self.id = parent.id + '.' + str(len(parent.subtasks))
else:
self.id = str(len(parent.subtasks))
self.parent = parent
self.goal = goal
logger.debug(f'Creating task {self.id} with parent={parent.id}, goal={goal}')
self.subtasks = []
for subtask in subtasks or []:
if isinstance(subtask, Task):
self.subtasks.append(subtask)
else:
goal = str(subtask.get('goal', ''))
state = str(subtask.get('state', OPEN_STATE))
subtasks = subtask.get('subtasks')
logger.debug(f'Reading: {goal}, {state}, {subtasks}')
self.subtasks.append(Task(self, goal, state, subtasks))
self.state = OPEN_STATE
def to_string(self, indent: str = '') -> str:
"""Returns a string representation of the task and its subtasks.
Args:
indent: The indentation string for formatting the output.
Returns:
A string representation of the task and its subtasks.
"""
emoji = ''
if self.state == VERIFIED_STATE:
emoji = ''
elif self.state == COMPLETED_STATE:
emoji = '🟢'
elif self.state == ABANDONED_STATE:
emoji = ''
elif self.state == IN_PROGRESS_STATE:
emoji = '💪'
elif self.state == OPEN_STATE:
emoji = '🔵'
result = indent + emoji + ' ' + self.id + ' ' + self.goal + '\n'
for subtask in self.subtasks:
result += subtask.to_string(indent + ' ')
return result
def to_dict(self) -> dict:
"""Returns a dictionary representation of the task.
Returns:
A dictionary containing the task's attributes.
"""
return {
'id': self.id,
'goal': self.goal,
'state': self.state,
'subtasks': [t.to_dict() for t in self.subtasks],
}
def set_state(self, state: str) -> None:
"""Sets the state of the task and its subtasks.
Args:
state: The new state of the task.
Raises:
TaskInvalidStateError: If the provided state is invalid.
"""
if state not in STATES:
logger.error('Invalid state: %s', state)
raise TaskInvalidStateError(state)
self.state = state
if (
state == COMPLETED_STATE
or state == ABANDONED_STATE
or state == VERIFIED_STATE
):
for subtask in self.subtasks:
if subtask.state != ABANDONED_STATE:
subtask.set_state(state)
elif state == IN_PROGRESS_STATE:
if self.parent is not None:
self.parent.set_state(state)
def get_current_task(self) -> 'Task' | None:
"""Retrieves the current task in progress.
Returns:
The current task in progress, or None if no task is in progress.
"""
for subtask in self.subtasks:
if subtask.state == IN_PROGRESS_STATE:
return subtask.get_current_task()
if self.state == IN_PROGRESS_STATE:
return self
return None
class RootTask(Task):
"""Serves as the root node in a tree of tasks.
Because we want the top-level of the root_task to be a list of tasks (1, 2, 3, etc.),
the "root node" of the data structure is kind of invisible--it just
holds references to the top-level tasks.
Attributes:
id: Kept blank for root_task
goal: Kept blank for root_task
parent: None for root_task
subtasks: The top-level list of tasks associated with the root_task.
state: The state of the root_task.
"""
id: str = ''
goal: str = ''
parent: None = None
def __init__(self) -> None:
self.subtasks = []
self.state = OPEN_STATE
def __str__(self) -> str:
"""Returns a string representation of the root_task.
Returns:
A string representation of the root_task.
"""
return self.to_string()
def get_task_by_id(self, id: str) -> Task:
"""Retrieves a task by its ID.
Args:
id: The ID of the task.
Returns:
The task with the specified ID.
Raises:
AgentMalformedActionError: If the provided task ID is invalid or does not exist.
"""
if id == '':
return self
if len(self.subtasks) == 0:
raise LLMMalformedActionError('Task does not exist:' + id)
try:
parts = [int(p) for p in id.split('.')]
except ValueError:
raise LLMMalformedActionError('Invalid task id:' + id)
task: Task = self
for part in parts:
if part >= len(task.subtasks):
raise LLMMalformedActionError('Task does not exist:' + id)
task = task.subtasks[part]
return task
def add_subtask(
self,
parent_id: str,
goal: str,
subtasks: list[dict | Task] | None = None,
) -> None:
"""Adds a subtask to a parent task.
Args:
parent_id: The ID of the parent task.
goal: The goal of the subtask.
subtasks: A list of subtasks associated with the new subtask.
"""
subtasks = subtasks or []
parent = self.get_task_by_id(parent_id)
child = Task(parent=parent, goal=goal, subtasks=subtasks)
parent.subtasks.append(child)
def set_subtask_state(self, id: str, state: str) -> None:
"""Sets the state of a subtask.
Args:
id: The ID of the subtask.
state: The new state of the subtask.
"""
task = self.get_task_by_id(id)
logger.debug('Setting task {task.id} from state {task.state} to {state}')
task.set_state(state)
unfinished_tasks = [
t
for t in self.subtasks
if t.state not in [COMPLETED_STATE, VERIFIED_STATE, ABANDONED_STATE]
]
if len(unfinished_tasks) == 0:
self.set_state(COMPLETED_STATE)

View File

@@ -28,6 +28,8 @@ class AgentConfig(BaseModel):
"""Whether to enable finish tool"""
enable_prompt_extensions: bool = Field(default=True)
"""Whether to enable prompt extensions"""
enable_mcp: bool = Field(default=True)
"""Whether to enable MCP tools"""
disabled_microagents: list[str] = Field(default_factory=list)
"""A list of microagents to disable (by name, without .py extension, e.g. ["github", "lint"]). Default is None."""
enable_history_truncation: bool = Field(default=True)

View File

@@ -116,8 +116,6 @@ async def run_controller(
selected_repository=config.sandbox.selected_repo,
)
await add_mcp_tools_to_agent(agent, runtime, config.mcp)
event_stream = runtime.event_stream
# when memory is created, it will load the microagents from the selected repository
@@ -130,6 +128,10 @@ async def run_controller(
repo_directory=repo_directory,
)
# Add MCP tools to the agent
if agent.config.enable_mcp:
await add_mcp_tools_to_agent(agent, runtime, memory, config.mcp)
replay_events: list[Event] | None = None
if config.replay_trajectory_path:
logger.info('Trajectory replay is enabled')

View File

@@ -154,7 +154,7 @@ def create_memory(
if runtime:
# sets available hosts
memory.set_runtime_info(runtime)
memory.set_contextual_info(runtime, {})
# loads microagents from repo/.openhands/microagents
microagents: list[BaseMicroagent] = runtime.get_microagents_from_selected_repo(

View File

@@ -78,6 +78,7 @@ class EventStore(EventStoreABC):
end_id: int | None = None,
reverse: bool = False,
filter: EventFilter | None = None,
limit: int | None = None,
) -> Iterable[Event]:
"""
Retrieve events from the event stream, optionally filtering out events of a given type
@@ -107,6 +108,7 @@ class EventStore(EventStoreABC):
step = 1
cache_page = _DUMMY_PAGE
num_results = 0
for index in range(start_id, end_id, step):
if not should_continue():
return
@@ -121,6 +123,9 @@ class EventStore(EventStoreABC):
if event:
if not filter or filter.include(event):
yield event
num_results += 1
if limit and limit <= num_results:
return
def get_event(self, id: int) -> Event:
filename = self._get_filename_for_id(id, self.user_id)

View File

@@ -23,6 +23,7 @@ class EventStoreABC:
end_id: int | None = None,
reverse: bool = False,
filter: EventFilter | None = None,
limit: int | None = None,
) -> Iterable[Event]:
"""
Retrieve events from the event stream, optionally excluding events using a filter

View File

@@ -74,6 +74,8 @@ class RecallObservation(Observation):
runtime_hosts: dict[str, int] = field(default_factory=dict)
additional_agent_instructions: str = ''
date: str = ''
custom_secrets_descriptions: dict[str, str] = field(default_factory=dict)
context_message: str | None = None
# knowledge
microagent_knowledge: list[MicroagentKnowledge] = field(default_factory=list)
@@ -114,7 +116,8 @@ class RecallObservation(Observation):
f'repo_instructions={self.repo_instructions[:20]}...',
f'runtime_hosts={self.runtime_hosts}',
f'additional_agent_instructions={self.additional_agent_instructions[:20]}...',
f'date={self.date}',
f'date={self.date}'
f'custom_secrets_descriptions={self.custom_secrets_descriptions}',
]
)
else:

View File

@@ -170,6 +170,7 @@ class IPythonRunCellObservation(Observation):
code: str
observation: str = ObservationType.RUN_IPYTHON
image_urls: list[str] | None = None
@property
def error(self) -> bool:
@@ -184,4 +185,7 @@ class IPythonRunCellObservation(Observation):
return True # IPython cells are always considered successful
def __str__(self) -> str:
return f'**IPythonRunCellObservation**\n{self.content}'
result = f'**IPythonRunCellObservation**\n{self.content}'
if self.image_urls:
result += f'\nImages: {len(self.image_urls)}'
return result

View File

@@ -18,6 +18,12 @@ from openhands.core.exceptions import (
FunctionCallConversionError,
FunctionCallValidationError,
)
from openhands.llm.tool_names import (
BROWSER_TOOL_NAME,
EXECUTE_BASH_TOOL_NAME,
FINISH_TOOL_NAME,
STR_REPLACE_EDITOR_TOOL_NAME,
)
# Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b
SYSTEM_PROMPT_SUFFIX_TEMPLATE = """
@@ -55,14 +61,12 @@ def refine_prompt(prompt: str) -> str:
return prompt
# NOTE: we need to make sure this example is always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = refine_prompt("""
Here's a running example of how to perform a task with the provided tools.
--------------------- START OF EXAMPLE ---------------------
USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
# NOTE: we need to make sure these examples are always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py
# Example snippets for each tool
TOOL_EXAMPLES = {
'execute_bash': {
'check_dir': """
ASSISTANT: Sure! Let me first check the current directory:
<function=execute_bash>
<parameter=command>
@@ -73,29 +77,10 @@ pwd && ls
USER: EXECUTION RESULT of [execute_bash]:
/workspace
openhands@runtime:~/workspace$
ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
<function=str_replace_editor>
<parameter=command>create</parameter>
<parameter=path>/workspace/app.py</parameter>
<parameter=file_text>
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index() -> str:
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
</parameter>
</function>
USER: EXECUTION RESULT of [str_replace_editor]:
File created successfully at: /workspace/app.py
ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
""",
'run_server': """
ASSISTANT:
Let me run the Python file for you:
<function=execute_bash>
<parameter=command>
python3 app.py > server.log 2>&1 &
@@ -106,7 +91,8 @@ USER: EXECUTION RESULT of [execute_bash]:
[1] 121
[1]+ Exit 1 python3 app.py > server.log 2>&1
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
ASSISTANT:
Looks like the server was running with PID 121 then crashed. Let me check the server log:
<function=execute_bash>
<parameter=command>
cat server.log
@@ -119,7 +105,8 @@ Traceback (most recent call last):
from flask import Flask
ModuleNotFoundError: No module named 'flask'
ASSISTANT: Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you:
ASSISTANT:
Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you:
<function=execute_bash>
<parameter=command>
pip3 install flask
@@ -143,7 +130,8 @@ Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/pyt
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
ASSISTANT: The `flask` module is installed successfully. Let me run the Python file again:
ASSISTANT:
The `flask` module is installed successfully. Let me run the Python file again:
<function=execute_bash>
<parameter=command>
python3 app.py > server.log 2>&1 &
@@ -170,7 +158,8 @@ Press CTRL+C to quit
ASSISTANT:
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
""",
'kill_server': """
USER: Now kill the server, make it display the numbers in a table format.
ASSISTANT:
@@ -183,7 +172,48 @@ kill 124
USER: EXECUTION RESULT of [execute_bash]:
[1]+ Terminated python3 app.py > server.log 2>&1
""",
'run_server_again': """
ASSISTANT:
Running the updated file:
<function=execute_bash>
<parameter=command>
python3 app.py > server.log 2>&1 &
</parameter>
</function>
USER: EXECUTION RESULT of [execute_bash]:
[1] 126
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000.
""",
},
'str_replace_editor': {
'create_file': """
ASSISTANT:
There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
<function=str_replace_editor>
<parameter=command>create</parameter>
<parameter=path>/workspace/app.py</parameter>
<parameter=file_text>
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
numbers = list(range(1, 11))
return str(numbers)
if __name__ == '__main__':
app.run(port=5000)
</parameter>
</function>
USER: EXECUTION RESULT of [str_replace_editor]:
File created successfully at: /workspace/app.py
""",
'edit_file': """
ASSISTANT:
Now let me display the numbers in a table format:
<function=str_replace_editor>
@@ -205,28 +235,100 @@ The file /workspace/app.py has been edited. Here's the result of running `cat -n
10 app.run(port=5000)
Review the changes and make sure they are as expected. Edit the file again if necessary.
""",
},
'browser': {
'view_page': """
ASSISTANT:
Running the updated file:
<function=execute_bash>
<parameter=command>
python3 app.py > server.log 2>&1 &
Let me check how the page looks in the browser:
<function=browser>
<parameter=code>
goto('http://127.0.0.1:5000')
noop(1000) # Wait for page to load
</parameter>
</function>
USER: EXECUTION RESULT of [execute_bash]:
[1] 126
USER: EXECUTION RESULT of [browser]:
[Browser shows the numbers in a table format]
"""
},
'finish': {
'task_completed': """
ASSISTANT:
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests!
<function=finish>
<parameter=message>The task has been completed. The web server is running and displaying numbers 1-10 in a table format at http://127.0.0.1:5000.</parameter>
<parameter=task_completed>true</parameter>
</function>
"""
},
}
def get_example_for_tools(tools: list[dict]) -> str:
"""Generate an in-context learning example based on available tools."""
available_tools = set()
for tool in tools:
if tool['type'] == 'function':
name = tool['function']['name']
if name == EXECUTE_BASH_TOOL_NAME:
available_tools.add('execute_bash')
elif name == STR_REPLACE_EDITOR_TOOL_NAME:
available_tools.add('str_replace_editor')
elif name == BROWSER_TOOL_NAME:
available_tools.add('browser')
elif name == FINISH_TOOL_NAME:
available_tools.add('finish')
if not available_tools:
return ''
example = """Here's a running example of how to perform a task with the provided tools.
--------------------- START OF EXAMPLE ---------------------
USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
"""
# Build example based on available tools
if 'execute_bash' in available_tools:
example += TOOL_EXAMPLES['execute_bash']['check_dir']
if 'str_replace_editor' in available_tools:
example += TOOL_EXAMPLES['str_replace_editor']['create_file']
if 'execute_bash' in available_tools:
example += TOOL_EXAMPLES['execute_bash']['run_server']
if 'browser' in available_tools:
example += TOOL_EXAMPLES['browser']['view_page']
if 'execute_bash' in available_tools:
example += TOOL_EXAMPLES['execute_bash']['kill_server']
if 'str_replace_editor' in available_tools:
example += TOOL_EXAMPLES['str_replace_editor']['edit_file']
if 'execute_bash' in available_tools:
example += TOOL_EXAMPLES['execute_bash']['run_server_again']
if 'finish' in available_tools:
example += TOOL_EXAMPLES['finish']['task_completed']
example += """
--------------------- END OF EXAMPLE ---------------------
Do NOT assume the environment is the same as in the example above.
--------------------- NEW TASK DESCRIPTION ---------------------
""").lstrip()
"""
example = example.lstrip()
return example
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX = """
--------------------- END OF NEW TASK DESCRIPTION ---------------------
@@ -352,74 +454,42 @@ def convert_fncall_messages_to_non_fncall_messages(
# Add in-context learning example for the first user message
if not first_user_message_encountered and add_in_context_learning_example:
first_user_message_encountered = True
# Check tools
if not (
tools
and len(tools) > 0
and any(
(
tool['type'] == 'function'
and tool['function']['name']
== refine_prompt('execute_bash')
and 'command'
in tool['function']['parameters']['properties']
)
for tool in tools
)
and any(
(
tool['type'] == 'function'
and tool['function']['name'] == 'str_replace_editor'
and 'path' in tool['function']['parameters']['properties']
and 'file_text'
in tool['function']['parameters']['properties']
and 'old_str'
in tool['function']['parameters']['properties']
and 'new_str'
in tool['function']['parameters']['properties']
)
for tool in tools
)
):
raise FunctionCallConversionError(
'The currently provided tool set are NOT compatible with the in-context learning example for FnCall to Non-FnCall conversion. '
'Please update your tool set OR the in-context learning example in openhands/llm/fn_call_converter.py'
)
# add in-context learning example
if isinstance(content, str):
content = (
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
+ content
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
)
elif isinstance(content, list):
if content and content[0]['type'] == 'text':
content[0]['text'] = (
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
+ content[0]['text']
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
)
# Generate example based on available tools
example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)
# Add example if we have any tools
if example:
# add in-context learning example
if isinstance(content, str):
content = example + content + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
elif isinstance(content, list):
if content and content[0]['type'] == 'text':
content[0]['text'] = (
example
+ content[0]['text']
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
)
else:
content = (
[
{
'type': 'text',
'text': example,
}
]
+ content
+ [
{
'type': 'text',
'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
}
]
)
else:
content = (
[
{
'type': 'text',
'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX,
}
]
+ content
+ [
{
'type': 'text',
'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
}
]
raise FunctionCallConversionError(
f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
)
else:
raise FunctionCallConversionError(
f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
)
converted_messages.append(
{
'role': 'user',
@@ -604,17 +674,28 @@ def convert_non_fncall_messages_to_fncall_messages(
if not first_user_message_encountered:
first_user_message_encountered = True
if isinstance(content, str):
content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, '')
content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '')
# Remove any existing example
if content.startswith(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)):
content = content.replace(
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools), '', 1
)
if content.endswith(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX):
content = content.replace(
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1
)
elif isinstance(content, list):
for item in content:
if item['type'] == 'text':
item['text'] = item['text'].replace(
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, ''
)
item['text'] = item['text'].replace(
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, ''
)
# Remove any existing example
example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)
if item['text'].startswith(example):
item['text'] = item['text'].replace(example, '', 1)
if item['text'].endswith(
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
):
item['text'] = item['text'].replace(
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1
)
else:
raise FunctionCallConversionError(
f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'

View File

@@ -0,0 +1,6 @@
"""Constants for tool names used in function calling."""
EXECUTE_BASH_TOOL_NAME = 'execute_bash'
STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
BROWSER_TOOL_NAME = 'browser'
FINISH_TOOL_NAME = 'finish'

View File

@@ -10,6 +10,7 @@ from openhands.events.action.mcp import MCPAction
from openhands.events.observation.mcp import MCPObservation
from openhands.events.observation.observation import Observation
from openhands.mcp.client import MCPClient
from openhands.memory.memory import Memory
from openhands.runtime.base import Runtime
@@ -149,7 +150,7 @@ async def call_tool_mcp(mcp_clients: list[MCPClient], action: MCPAction) -> Obse
async def add_mcp_tools_to_agent(
agent: 'Agent', runtime: Runtime, mcp_config: MCPConfig
agent: 'Agent', runtime: Runtime, memory: 'Memory', mcp_config: MCPConfig
):
"""
Add MCP tools to an agent.
@@ -165,8 +166,25 @@ async def add_mcp_tools_to_agent(
'Runtime must be initialized before adding MCP tools'
)
# Add microagent MCP tools if available
microagent_mcp_configs = memory.get_microagent_mcp_tools()
extra_stdio_servers = []
for mcp_config in microagent_mcp_configs:
if mcp_config.sse_servers:
logger.warning(
'Microagent MCP config contains SSE servers, it is not yet supported.'
)
if mcp_config.stdio_servers:
for stdio_server in mcp_config.stdio_servers:
# Check if this stdio server is already in the config
if stdio_server not in extra_stdio_servers:
extra_stdio_servers.append(stdio_server)
logger.info(f'Added microagent stdio server: {stdio_server.name}')
# Add the runtime as another MCP server
updated_mcp_config = runtime.get_updated_mcp_config()
updated_mcp_config = runtime.get_updated_mcp_config(extra_stdio_servers)
# Fetch the MCP tools
mcp_tools = await fetch_mcp_tools_from_config(updated_mcp_config)

Some files were not shown because too many files have changed in this diff Show More