mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
98 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6ba79c454b | |||
| fbc06f42aa | |||
| f35ed5e277 | |||
| 6787a3adf7 | |||
| fa50e0c9b9 | |||
| f4c5bbda19 | |||
| 6562297615 | |||
| 0217a7cfbd | |||
| aa15c9d385 | |||
| 8ad89e368a | |||
| 29ba94fc0f | |||
| 8956f92f6a | |||
| 753e3c4205 | |||
| ecd573febc | |||
| 325a558fbc | |||
| 666c186826 | |||
| 2d2dbf1561 | |||
| abac25cc4c | |||
| 70b21d16bd | |||
| bf82f75ae4 | |||
| a8bce3724f | |||
| e109f7e58e | |||
| a20f299579 | |||
| bf77da7849 | |||
| 869ea59ecd | |||
| f093c14ad3 | |||
| 9d3a0a02b8 | |||
| 35bab5070d | |||
| d03b9775b5 | |||
| fab4532f6b | |||
| d33913e036 | |||
| e52aee168e | |||
| c27b191358 | |||
| 22c5ad85d9 | |||
| 0180ce77b1 | |||
| 2f14e53746 | |||
| 52723061b1 | |||
| 42f1fc92fa | |||
| 3f8bc8a7ea | |||
| f869ad995c | |||
| 74c942c911 | |||
| eed7e2dd6e | |||
| 663e36109c | |||
| e92e4a1cbc | |||
| 61ce673400 | |||
| b95840db0c | |||
| 003ebc0ded | |||
| df8bbc2b67 | |||
| eb5be2ab63 | |||
| 81f2b08a89 | |||
| cb72a06ca3 | |||
| 340c2310d1 | |||
| f4e5fb2873 | |||
| 1a7003a705 | |||
| 8d097efb4f | |||
| 2e98fc8fb3 | |||
| e3e00ed70a | |||
| 96d1992823 | |||
| 7a3a0d8c0c | |||
| fdffca18e0 | |||
| b10416e0a3 | |||
| 1f462d2417 | |||
| 0a6ff463db | |||
| 9ff15bf94f | |||
| 6c48013601 | |||
| 07fcb786af | |||
| ce42e22105 | |||
| 14ee6d7afe | |||
| 57391d6e66 | |||
| a7bb73ded2 | |||
| f4b123f73b | |||
| ae31a24c29 | |||
| 3a478c2303 | |||
| 82b5325792 | |||
| 265e8ae5f4 | |||
| 0cbf50576d | |||
| 745038b394 | |||
| b018567d53 | |||
| 30e39e85d0 | |||
| 4443417c75 | |||
| efbff2e655 | |||
| 63565982aa | |||
| b07fddcb71 | |||
| 99b50d038e | |||
| 1ddfa99c57 | |||
| 0c03e257b7 | |||
| 85e3a00d9d | |||
| edd51102ad | |||
| f5fccab1f6 | |||
| ef12bc5381 | |||
| b197e0af47 | |||
| 341b695ad3 | |||
| d46d99a35e | |||
| 653168fc3d | |||
| cb5e7f0130 | |||
| 312b9fbfb1 | |||
| ba599c7dd6 | |||
| 7e359eda4a |
@@ -1,11 +1,12 @@
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces**
|
||||
|
||||
- [ ] Include this change in the Release Notes. If checked, you must provide an **end-user friendly** description for your change below
|
||||
|
||||
---
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions**
|
||||
- [ ] This change is worth documenting at https://docs.all-hands.dev/
|
||||
- [ ] Include this change in the Release Notes. If checked, you **must** provide an **end-user friendly** description for your change below
|
||||
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses**
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses.**
|
||||
|
||||
@@ -24,6 +24,10 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
|
||||
@@ -88,12 +88,10 @@ jobs:
|
||||
run: |
|
||||
python -m pip index versions openhands-ai > openhands_versions.txt
|
||||
OPENHANDS_VERSION=$(head -n 1 openhands_versions.txt | awk '{print $2}' | tr -d '()')
|
||||
# Ensure requirements.txt ends with newline before appending
|
||||
if [ -f requirements.txt ] && [ -s requirements.txt ]; then
|
||||
sed -i -e '$a\' requirements.txt
|
||||
fi
|
||||
echo "openhands-ai==${OPENHANDS_VERSION}" >> requirements.txt
|
||||
cat requirements.txt
|
||||
|
||||
# Create a new requirements.txt locally within the workflow, ensuring no reference to the repo's file
|
||||
echo "openhands-ai==${OPENHANDS_VERSION}" > /tmp/requirements.txt
|
||||
cat /tmp/requirements.txt
|
||||
|
||||
- name: Cache pip dependencies
|
||||
if: |
|
||||
@@ -111,9 +109,9 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ env.pythonLocation }}/lib/python3.12/site-packages/*
|
||||
key: ${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('requirements.txt') }}
|
||||
key: ${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('/tmp/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('requirements.txt') }}
|
||||
${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('/tmp/requirements.txt') }}
|
||||
|
||||
- name: Check required environment variables
|
||||
env:
|
||||
@@ -225,13 +223,14 @@ jobs:
|
||||
} else {
|
||||
console.log("Installing from requirements.txt...");
|
||||
await exec.exec("python -m pip install --upgrade pip");
|
||||
await exec.exec("pip install -r requirements.txt");
|
||||
await exec.exec("pip install -r /tmp/requirements.txt");
|
||||
}
|
||||
|
||||
- name: Attempt to resolve issue
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
|
||||
GITHUB_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
GIT_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
@@ -268,6 +267,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
|
||||
GITHUB_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
GIT_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
|
||||
@@ -32,6 +32,10 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
@@ -44,7 +48,7 @@ jobs:
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Run Tests
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_long_term_memory.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
|
||||
+1
-1
@@ -100,7 +100,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.23-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.26-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
SHELL=/bin/bash
|
||||
SHELL=/usr/bin/env bash
|
||||
# Makefile for OpenHands project
|
||||
|
||||
# Variables
|
||||
@@ -81,10 +81,10 @@ check-nodejs:
|
||||
@if command -v node > /dev/null; then \
|
||||
NODE_VERSION=$(shell node --version | sed -E 's/v//g'); \
|
||||
IFS='.' read -r -a NODE_VERSION_ARRAY <<< "$$NODE_VERSION"; \
|
||||
if [ "$${NODE_VERSION_ARRAY[0]}" -ge 20 ]; then \
|
||||
if [ "$${NODE_VERSION_ARRAY[0]}" -ge 22 ]; then \
|
||||
echo "$(BLUE)Node.js $$NODE_VERSION is already installed.$(RESET)"; \
|
||||
else \
|
||||
echo "$(RED)Node.js 20.x or later is required. Please install Node.js 20.x or later to continue.$(RESET)"; \
|
||||
echo "$(RED)Node.js 22.x or later is required. Please install Node.js 22.x or later to continue.$(RESET)"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
else \
|
||||
|
||||
@@ -43,17 +43,17 @@ See the [Running OpenHands](https://docs.all-hands.dev/modules/usage/installatio
|
||||
system requirements and more information.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
@@ -17,6 +17,12 @@
|
||||
#modal_api_token_id = ""
|
||||
#modal_api_token_secret = ""
|
||||
|
||||
# API key for Daytona
|
||||
#daytona_api_key = ""
|
||||
|
||||
# Daytona Target
|
||||
#daytona_target = ""
|
||||
|
||||
# Base path for the workspace
|
||||
workspace_base = "./workspace"
|
||||
|
||||
@@ -234,6 +240,10 @@ codeact_enable_jupyter = true
|
||||
# List of microagents to disable
|
||||
#disabled_microagents = []
|
||||
|
||||
# Whether history should be truncated to continue the session when hitting LLM context
|
||||
# length limit
|
||||
enable_history_truncation = true
|
||||
|
||||
[agent.RepoExplorerAgent]
|
||||
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
|
||||
# useful when an agent doesn't demand high quality but uses a lot of tokens
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Initialize variables with default values
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.23-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.26-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -o pipefail
|
||||
|
||||
function get_docker() {
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -46,3 +46,11 @@ docker run -it \
|
||||
-e THAT=that
|
||||
...
|
||||
```
|
||||
|
||||
### Referring to UI Elements
|
||||
|
||||
When referencing UI elements, use ``.
|
||||
|
||||
Example:
|
||||
1. Toggle the `Advanced` option
|
||||
2. Enter your model in the `Custom Model` textbox.
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
+2
-2
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -54,14 +54,13 @@ graph TD
|
||||
6. Action Execution: The runtime client receives actions from the backend, executes them in the sandboxed environment, and sends back observations
|
||||
7. Observation Return: The action execution server sends execution results back to the OpenHands backend as observations
|
||||
|
||||
|
||||
The role of the client:
|
||||
|
||||
- It acts as an intermediary between the OpenHands backend and the sandboxed environment
|
||||
- It executes various types of actions (shell commands, file operations, Python code, etc.) safely within the container
|
||||
- It manages the state of the sandboxed environment, including the current working directory and loaded plugins
|
||||
- It formats and returns observations to the backend, ensuring a consistent interface for processing results
|
||||
|
||||
|
||||
## How OpenHands builds and maintains OH Runtime images
|
||||
|
||||
OpenHands' approach to building and managing runtime images ensures efficiency, consistency, and flexibility in creating and maintaining Docker images for both production and development environments.
|
||||
@@ -78,16 +77,15 @@ Tags may be in one of 2 formats:
|
||||
- **Source Tag**: `oh_v{openhands_version}_{16_digit_lock_hash}_{16_digit_source_hash}`
|
||||
(e.g.: `oh_v0.9.9_1234567890abcdef_1234567890abcdef`)
|
||||
|
||||
|
||||
#### Source Tag - Most Specific
|
||||
|
||||
This is the first 16 digits of the MD5 of the directory hash for the source directory. This gives a hash
|
||||
for only the openhands source
|
||||
|
||||
|
||||
#### Lock Tag
|
||||
|
||||
This hash is built from the first 16 digits of the MD5 of:
|
||||
|
||||
- The name of the base image upon which the image was built (e.g.: `nikolaik/python-nodejs:python3.12-nodejs22`)
|
||||
- The content of the `pyproject.toml` included in the image.
|
||||
- The content of the `poetry.lock` included in the image.
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
# Cloud GitHub Resolver
|
||||
|
||||
The GitHub Resolver automates code fixes and provides intelligent assistance for your repositories.
|
||||
|
||||
## Setup
|
||||
|
||||
The Cloud Github Resolver is available automatically when you
|
||||
[grant OpenHands Cloud repository access](./openhands-cloud.md#adding-repositories).
|
||||
|
||||
## Usage
|
||||
|
||||
### Issues
|
||||
|
||||
On your repository, label an issue with `openhands`. OpenHands will attempt to fix the issue.
|
||||
|
||||
### Pull Requests
|
||||
|
||||
In order to get OpenHands to work on pull requests, use `@openhands` in top level or single inline comments to:
|
||||
- Ask questions
|
||||
- Request updates
|
||||
- Get code explanations
|
||||
@@ -0,0 +1,39 @@
|
||||
# Openhands Cloud
|
||||
|
||||
OpenHands Cloud is the cloud hosted version of OpenHands by All Hands AI.
|
||||
|
||||
## Accessing OpenHands Cloud
|
||||
|
||||
Currently, users are being admitted to access OpenHands Cloud in waves. To sign up,
|
||||
[join the waitlist](https://www.all-hands.dev/join-waitlist). Once you are approved, you will get an email with
|
||||
instructions on how to access it.
|
||||
|
||||
## Getting Started
|
||||
|
||||
After visiting OpenHands Cloud, you will be asked to connect with your GitHub account:
|
||||
1. After reading and accepting the terms of service, click `Connect to GitHub`.
|
||||
2. Review the permissions requested by OpenHands and then click `Authorize OpenHands by All Hands AI`.
|
||||
- OpenHands will require some permissions from your GitHub account. To read more about these permissions,
|
||||
you can click the `Learn more` link on the GitHub authorize page.
|
||||
|
||||
## Adding Repositories
|
||||
|
||||
You can grant OpenHands specific repository access:
|
||||
1. Under the `Select a GitHub project` dropdown, select `Add more repositories...`.
|
||||
2. Select the organization, then choose the specific repositories to grant OpenHands access to.
|
||||
- Openhands requests short-lived tokens (8-hour expiry) with these permissions:
|
||||
- Actions: Read and write
|
||||
- Administration: Read-only
|
||||
- Commit statuses: Read and write
|
||||
- Contents: Read and write
|
||||
- Issues: Read and write
|
||||
- Metadata: Read-only
|
||||
- Pull requests: Read and write
|
||||
- Webhooks: Read and write
|
||||
- Workflows: Read and write
|
||||
- Repository access for a user is granted based on:
|
||||
- Granted permission for the repository.
|
||||
- User's GitHub permissions (owner/collaborator).
|
||||
|
||||
You can manage repository access any time by following the above workflow or visiting the Settings page and selecting
|
||||
`Configure GitHub Repositories` under the `GitHub Settings` section.
|
||||
@@ -340,6 +340,11 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
|
||||
- Default: `false`
|
||||
- Description: Whether Jupyter is enabled in the action space
|
||||
|
||||
- `enable_history_truncation`
|
||||
- Type: `bool`
|
||||
- Default: `true`
|
||||
- Description: Whether history should be truncated to continue the session when hitting LLM context length limit
|
||||
|
||||
### Microagent Usage
|
||||
- `enable_prompt_extensions`
|
||||
- Type: `bool`
|
||||
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
# GUI Mode
|
||||
|
||||
## Introduction
|
||||
|
||||
OpenHands provides a user-friendly Graphical User Interface (GUI) mode for interacting with the AI assistant.
|
||||
This mode offers an intuitive way to set up the environment, manage settings, and communicate with the AI.
|
||||
OpenHands provides a Graphical User Interface (GUI) mode for interacting with the AI assistant.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
@@ -14,104 +11,95 @@ This mode offers an intuitive way to set up the environment, manage settings, an
|
||||
|
||||
### Initial Setup
|
||||
|
||||
1. Upon first launch, you'll see a settings modal.
|
||||
2. Select an `LLM Provider` and `LLM Model` from the dropdown menus.
|
||||
1. Upon first launch, you'll see a settings page.
|
||||
2. Select an `LLM Provider` and `LLM Model` from the dropdown menus. If the required model does not exist in the list,
|
||||
toggle `Advanced` options and enter it with the correct prefix in the `Custom Model` text box.
|
||||
3. Enter the corresponding `API Key` for your chosen provider.
|
||||
4. Click "Save" to apply the settings.
|
||||
4. Click `Save Changes` to apply the settings.
|
||||
|
||||
### GitHub Token Setup
|
||||
|
||||
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
|
||||
|
||||
- **Locally (OSS)**: The user directly inputs their GitHub token.
|
||||
- **Online (SaaS)**: The token is obtained through GitHub OAuth authentication.
|
||||
|
||||
#### Setting Up a Local GitHub Token
|
||||
|
||||
1. **Generate a Personal Access Token (PAT)**:
|
||||
- Go to GitHub Settings > Developer Settings > Personal Access Tokens > Tokens (classic).
|
||||
- Click "Generate new token (classic)".
|
||||
- **Local Installation**: The user directly inputs their GitHub token.
|
||||
<details>
|
||||
<summary>Setting Up a GitHub Token</summary>
|
||||
1. **Generate a Personal Access Token (PAT)**:
|
||||
- On GitHub, go to Settings > Developer Settings > Personal Access Tokens > Tokens (classic).
|
||||
- Click `Generate new token (classic)`.
|
||||
- Required scopes:
|
||||
- `repo` (Full control of private repositories)
|
||||
- `workflow` (Update GitHub Action workflows)
|
||||
- `read:org` (Read organization data)
|
||||
2. **Enter Token in OpenHands**:
|
||||
- Click the Settings button (gear icon).
|
||||
- Navigate to the `GitHub Settings` section.
|
||||
- Paste your token in the `GitHub Token` field.
|
||||
- Click `Save Changes` to apply the changes.
|
||||
</details>
|
||||
|
||||
2. **Enter Token in OpenHands**:
|
||||
- Click the Settings button (gear icon) in the top right.
|
||||
- Navigate to the "GitHub" section.
|
||||
- Paste your token in the "GitHub Token" field.
|
||||
- Click "Save" to apply the changes.
|
||||
<details>
|
||||
<summary>Organizational Token Policies</summary>
|
||||
|
||||
#### Organizational Token Policies
|
||||
If you're working with organizational repositories, additional setup may be required:
|
||||
|
||||
If you're working with organizational repositories, additional setup may be required:
|
||||
|
||||
1. **Check Organization Requirements**:
|
||||
1. **Check Organization Requirements**:
|
||||
- Organization admins may enforce specific token policies.
|
||||
- Some organizations require tokens to be created with SSO enabled.
|
||||
- Review your organization's [token policy settings](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization).
|
||||
|
||||
2. **Verify Organization Access**:
|
||||
2. **Verify Organization Access**:
|
||||
- Go to your token settings on GitHub.
|
||||
- Look for the organization under "Organization access".
|
||||
- If required, click "Enable SSO" next to your organization.
|
||||
- Look for the organization under `Organization access`.
|
||||
- If required, click `Enable SSO` next to your organization.
|
||||
- Complete the SSO authorization process.
|
||||
</details>
|
||||
|
||||
#### OAuth Authentication (Online Mode)
|
||||
<details>
|
||||
<summary>Troubleshooting</summary>
|
||||
|
||||
When using OpenHands in online mode, the GitHub OAuth flow:
|
||||
Common issues and solutions:
|
||||
|
||||
1. Requests the following permissions:
|
||||
- **Token Not Recognized**:
|
||||
- Ensure the token is properly saved in settings.
|
||||
- Check that the token hasn't expired.
|
||||
- Verify the token has the required scopes.
|
||||
- Try regenerating the token.
|
||||
|
||||
- **Organization Access Denied**:
|
||||
- Check if SSO is required but not enabled.
|
||||
- Verify organization membership.
|
||||
- Contact organization admin if token policies are blocking access.
|
||||
|
||||
- **Verifying Token Works**:
|
||||
- The app will show a green checkmark if the token is valid.
|
||||
- Try accessing a repository to confirm permissions.
|
||||
- Check the browser console for any error messages.
|
||||
</details>
|
||||
|
||||
- **OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.
|
||||
|
||||
<details>
|
||||
<summary>OAuth Authentication</summary>
|
||||
|
||||
When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
|
||||
- Repository access (read/write)
|
||||
- Workflow management
|
||||
- Organization read access
|
||||
|
||||
2. Authentication steps:
|
||||
- Click "Sign in with GitHub" when prompted.
|
||||
To authenticate OpenHands:
|
||||
- Click `Sign in with GitHub` when prompted.
|
||||
- Review the requested permissions.
|
||||
- Authorize OpenHands to access your GitHub account.
|
||||
- If using an organization, authorize organization access if prompted.
|
||||
|
||||
#### Troubleshooting
|
||||
|
||||
Common issues and solutions:
|
||||
|
||||
- **Token Not Recognized**:
|
||||
- Ensure the token is properly saved in settings.
|
||||
- Check that the token hasn't expired.
|
||||
- Verify the token has the required scopes.
|
||||
- Try regenerating the token.
|
||||
|
||||
- **Organization Access Denied**:
|
||||
- Check if SSO is required but not enabled.
|
||||
- Verify organization membership.
|
||||
- Contact organization admin if token policies are blocking access.
|
||||
|
||||
- **Verifying Token Works**:
|
||||
- The app will show a green checkmark if the token is valid.
|
||||
- Try accessing a repository to confirm permissions.
|
||||
- Check the browser console for any error messages.
|
||||
- Use the "Test Connection" button in settings if available.
|
||||
</details>
|
||||
|
||||
### Advanced Settings
|
||||
|
||||
1. Toggle `Advanced Options` to access additional settings.
|
||||
1. Inside the Settings page, toggle `Advanced` options to access additional settings.
|
||||
2. Use the `Custom Model` text box to manually enter a model if it's not in the list.
|
||||
3. Specify a `Base URL` if required by your LLM provider.
|
||||
|
||||
### Main Interface
|
||||
|
||||
The main interface consists of several key components:
|
||||
|
||||
- **Chat Window**: The central area where you can view the conversation history with the AI assistant.
|
||||
- **Input Box**: Located at the bottom of the screen, use this to type your messages or commands to the AI.
|
||||
- **Send Button**: Click this to send your message to the AI.
|
||||
- **Settings Button**: A gear icon that opens the settings modal, allowing you to adjust your configuration at any time.
|
||||
- **Workspace Panel**: Displays the files and folders in your workspace, allowing you to navigate and view files, or the agent's past commands or web browsing history.
|
||||
|
||||
### Interacting with the AI
|
||||
|
||||
1. Type your question, request, or task description in the input box.
|
||||
1. Type your prompt in the input box.
|
||||
2. Click the send button or press Enter to submit your message.
|
||||
3. The AI will process your input and provide a response in the chat window.
|
||||
4. You can continue the conversation by asking follow-up questions or providing additional information.
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -6,11 +6,14 @@
|
||||
- Linux
|
||||
- Windows with [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) and [Docker Desktop support](https://docs.docker.com/desktop/setup/install/windows-install/#system-requirements)
|
||||
|
||||
A system with a modern processor and a minimum of **4GB RAM** is recommended to run OpenHands.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
<details>
|
||||
<summary>MacOS</summary>
|
||||
### Docker Desktop
|
||||
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Mac](https://docs.docker.com/desktop/setup/install/mac-install).
|
||||
2. Open Docker Desktop, go to `Settings > Advanced` and ensure `Allow the default Docker socket to be used` is enabled.
|
||||
@@ -23,7 +26,7 @@
|
||||
Tested with Ubuntu 22.04.
|
||||
:::
|
||||
|
||||
### Docker Desktop
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Linux](https://docs.docker.com/desktop/setup/install/linux/).
|
||||
|
||||
@@ -31,12 +34,13 @@
|
||||
|
||||
<details>
|
||||
<summary>Windows</summary>
|
||||
### WSL
|
||||
|
||||
**WSL**
|
||||
|
||||
1. [Install WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
|
||||
2. Run `wsl --version` in powershell and confirm `Default Version: 2`.
|
||||
|
||||
### Docker Desktop
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Windows](https://docs.docker.com/desktop/setup/install/windows-install).
|
||||
2. Open Docker Desktop, go to `Settings` and confirm the following:
|
||||
@@ -54,17 +58,17 @@
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.23
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
@@ -76,24 +80,22 @@ or run it on tagged issues with [a github action](https://docs.all-hands.dev/mod
|
||||
|
||||
## Setup
|
||||
|
||||
Upon launching OpenHands, you'll see a settings modal. You **must** select an `LLM Provider` and `LLM Model` and enter a corresponding `API Key`.
|
||||
Upon launching OpenHands, you'll see a Settings page. You **must** select an `LLM Provider` and `LLM Model` and enter a corresponding `API Key`.
|
||||
These can be changed at any time by selecting the `Settings` button (gear icon) in the UI.
|
||||
|
||||
If the required `LLM Model` does not exist in the list, you can toggle `Advanced Options` and manually enter it with the correct prefix
|
||||
If the required model does not exist in the list, you can toggle `Advanced` options and manually enter it with the correct prefix
|
||||
in the `Custom Model` text box.
|
||||
The `Advanced Options` also allow you to specify a `Base URL` if required.
|
||||
The `Advanced` options also allow you to specify a `Base URL` if required.
|
||||
|
||||
<div style={{ display: 'flex', justifyContent: 'center', gap: '20px' }}>
|
||||
<img src="/img/settings-screenshot.png" alt="settings-modal" width="340" />
|
||||
<img src="/img/settings-advanced.png" alt="settings-modal" width="335" />
|
||||
</div>
|
||||
Now you're ready to [get started with OpenHands](./getting-started).
|
||||
|
||||
## Versions
|
||||
|
||||
The command above pulls the most recent stable release of OpenHands. You have other options as well:
|
||||
- For a specific release, use `docker.all-hands.dev/all-hands-ai/openhands:$VERSION`, replacing $VERSION with the version number.
|
||||
- We use semver, and release major, minor, and patch tags. So `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release.
|
||||
- For the most up-to-date development version, you can use `docker.all-hands.dev/all-hands-ai/openhands:main`. This version is unstable and is recommended for testing or development purposes only.
|
||||
The [docker command above](./installation#start-the-app) pulls the most recent stable release of OpenHands. You have other options as well:
|
||||
- For a specific release, replace $VERSION in `openhands:$VERSION` and `runtime:$VERSION`, with the version number.
|
||||
We use SemVer so `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release.
|
||||
- For the most up-to-date development version, replace $VERSION in `openhands:$VERSION` and `runtime:$VERSION`, with `main`.
|
||||
This version is unstable and is recommended for testing or development purposes only.
|
||||
|
||||
You can choose the tag that best suits your needs based on stability requirements and desired features.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ You will need your ChatGPT deployment name which can be found on the deployments
|
||||
<deployment-name> below.
|
||||
:::
|
||||
|
||||
1. Enable `Advanced Options`
|
||||
1. Enable `Advanced` options
|
||||
2. Set the following:
|
||||
- `Custom Model` to azure/<deployment-name>
|
||||
- `Base URL` to your Azure API Base URL (e.g. `https://example-endpoint.openai.azure.com`)
|
||||
|
||||
@@ -10,7 +10,7 @@ OpenHands uses LiteLLM to make calls to Google's chat models. You can find their
|
||||
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
|
||||
- `LLM Provider` to `Gemini`
|
||||
- `LLM Model` to the model you will be using.
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. gemini/<model-name> like `gemini/gemini-1.5-pro`).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. gemini/<model-name> like `gemini/gemini-1.5-pro`).
|
||||
- `API Key` to your Gemini API key
|
||||
|
||||
## VertexAI - Google Cloud Platform Configs
|
||||
@@ -27,4 +27,4 @@ VERTEXAI_LOCATION="<your-gcp-location>"
|
||||
Then set the following in the OpenHands UI through the Settings:
|
||||
- `LLM Provider` to `VertexAI`
|
||||
- `LLM Model` to the model you will be using.
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. vertex_ai/<model-name>).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. vertex_ai/<model-name>).
|
||||
|
||||
@@ -8,7 +8,7 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
- `LLM Provider` to `Groq`
|
||||
- `LLM Model` to the model you will be using. [Visit here to see the list of
|
||||
models that Groq hosts](https://console.groq.com/docs/models). If the model is not in the list, toggle
|
||||
`Advanced Options`, and enter it in `Custom Model` (e.g. groq/<model-name> like `groq/llama3-70b-8192`).
|
||||
`Advanced` options, and enter it in `Custom Model` (e.g. groq/<model-name> like `groq/llama3-70b-8192`).
|
||||
- `API key` to your Groq API key. To find or create your Groq API Key, [see here](https://console.groq.com/keys).
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ models that Groq hosts](https://console.groq.com/docs/models). If the model is n
|
||||
|
||||
The Groq endpoint for chat completion is [mostly OpenAI-compatible](https://console.groq.com/docs/openai). Therefore, you can access Groq models as you
|
||||
would access any OpenAI-compatible endpoint. In the OpenHands UI through the Settings:
|
||||
1. Enable `Advanced Options`
|
||||
1. Enable `Advanced` options
|
||||
2. Set the following:
|
||||
- `Custom Model` to the prefix `openai/` + the model you will be using (e.g. `openai/llama3-70b-8192`)
|
||||
- `Base URL` to `https://api.groq.com/openai/v1`
|
||||
|
||||
@@ -8,7 +8,7 @@ To use LiteLLM proxy with OpenHands, you need to:
|
||||
|
||||
1. Set up a LiteLLM proxy server (see [LiteLLM documentation](https://docs.litellm.ai/docs/proxy/quick_start))
|
||||
2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
|
||||
* Enable `Advanced Options`
|
||||
* Enable `Advanced` options
|
||||
* `Custom Model` to the prefix `litellm_proxy/` + the model you will be using (e.g. `litellm_proxy/anthropic.claude-3-5-sonnet-20241022-v2:0`)
|
||||
* `Base URL` to your LiteLLM proxy URL (e.g. `https://your-litellm-proxy.com`)
|
||||
* `API Key` to your LiteLLM proxy API key
|
||||
|
||||
@@ -38,7 +38,7 @@ The following can be set in the OpenHands UI through the Settings:
|
||||
- `LLM Provider`
|
||||
- `LLM Model`
|
||||
- `API Key`
|
||||
- `Base URL` (through `Advanced Settings`)
|
||||
- `Base URL` (through `Advanced` settings)
|
||||
|
||||
There are some settings that may be necessary for some LLMs/providers that cannot be set through the UI. Instead, these
|
||||
can be set through environment variables passed to the [docker run command](/modules/usage/installation#start-the-app)
|
||||
@@ -63,22 +63,22 @@ We have a few guides for running OpenHands with specific model providers:
|
||||
### API retries and rate limits
|
||||
|
||||
LLM providers typically have rate limits, sometimes very low, and may require retries. OpenHands will automatically
|
||||
retry requests if it receives a Rate Limit Error (429 error code), API connection error, or other transient errors.
|
||||
retry requests if it receives a Rate Limit Error (429 error code).
|
||||
|
||||
You can customize these options as you need for the provider you're using. Check their documentation, and set the
|
||||
following environment variables to control the number of retries and the time between retries:
|
||||
|
||||
- `LLM_NUM_RETRIES` (Default of 8)
|
||||
- `LLM_RETRY_MIN_WAIT` (Default of 15 seconds)
|
||||
- `LLM_RETRY_MAX_WAIT` (Default of 120 seconds)
|
||||
- `LLM_NUM_RETRIES` (Default of 4 times)
|
||||
- `LLM_RETRY_MIN_WAIT` (Default of 5 seconds)
|
||||
- `LLM_RETRY_MAX_WAIT` (Default of 30 seconds)
|
||||
- `LLM_RETRY_MULTIPLIER` (Default of 2)
|
||||
|
||||
If you are running OpenHands in development mode, you can also set these options in the `config.toml` file:
|
||||
|
||||
```toml
|
||||
[llm]
|
||||
num_retries = 8
|
||||
retry_min_wait = 15
|
||||
retry_max_wait = 120
|
||||
num_retries = 4
|
||||
retry_min_wait = 5
|
||||
retry_max_wait = 30
|
||||
retry_multiplier = 2
|
||||
```
|
||||
|
||||
@@ -8,7 +8,7 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
* `LLM Provider` to `OpenAI`
|
||||
* `LLM Model` to the model you will be using.
|
||||
[Visit here to see a full list of OpenAI models that LiteLLM supports.](https://docs.litellm.ai/docs/providers/openai#openai-chat-completion-models)
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. openai/<model-name> like `openai/gpt-4o`).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. openai/<model-name> like `openai/gpt-4o`).
|
||||
* `API Key` to your OpenAI API key. To find or create your OpenAI Project API Key, [see here](https://platform.openai.com/api-keys).
|
||||
|
||||
## Using OpenAI-Compatible Endpoints
|
||||
@@ -18,7 +18,7 @@ Just as for OpenAI Chat completions, we use LiteLLM for OpenAI-compatible endpoi
|
||||
## Using an OpenAI Proxy
|
||||
|
||||
If you're using an OpenAI proxy, in the OpenHands UI through the Settings:
|
||||
1. Enable `Advanced Options`
|
||||
1. Enable `Advanced` options
|
||||
2. Set the following:
|
||||
- `Custom Model` to openai/<model-name> (e.g. `openai/gpt-4o` or openai/<proxy-prefix>/<model-name>)
|
||||
- `Base URL` to the URL of your OpenAI proxy
|
||||
|
||||
@@ -8,5 +8,5 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
* `LLM Provider` to `OpenRouter`
|
||||
* `LLM Model` to the model you will be using.
|
||||
[Visit here to see a full list of OpenRouter models](https://openrouter.ai/models).
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. openrouter/<model-name> like `openrouter/anthropic/claude-3.5-sonnet`).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. openrouter/<model-name> like `openrouter/anthropic/claude-3.5-sonnet`).
|
||||
* `API Key` to your OpenRouter API key.
|
||||
|
||||
@@ -16,7 +16,7 @@ some flags being passed to `docker run` that make this possible:
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.23-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
+20
-3
@@ -42,7 +42,7 @@ const sidebars: SidebarsConfig = {
|
||||
id: 'usage/prompting/microagents-public',
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -66,9 +66,26 @@ const sidebars: SidebarsConfig = {
|
||||
},
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Github Actions',
|
||||
label: 'Github Action',
|
||||
id: 'usage/how-to/github-action',
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Cloud',
|
||||
items: [
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Openhands Cloud',
|
||||
id: 'usage/cloud/openhands-cloud',
|
||||
},
|
||||
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Cloud GitHub Resolver',
|
||||
id: 'usage/cloud/cloud-github-resolver',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -185,7 +202,7 @@ const sidebars: SidebarsConfig = {
|
||||
type: 'doc',
|
||||
label: 'About',
|
||||
id: 'usage/about',
|
||||
}
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
@@ -23,6 +23,17 @@ export default function Home(): JSX.Element {
|
||||
})}
|
||||
>
|
||||
<HomepageHeader />
|
||||
<div style={{ textAlign: 'center', padding: '2rem' }}>
|
||||
<br />
|
||||
<h2>Most Popular Links</h2>
|
||||
<ul style={{ listStyleType: 'none'}}>
|
||||
<li><a href="/modules/usage/Installation">How to Run OpenHands</a></li>
|
||||
<li><a href="/modules/usage/prompting/microagents-repo">Customizing OpenHands to a repository</a></li>
|
||||
<li><a href="/modules/usage/how-to/github-action">Integrating OpenHands with Github</a></li>
|
||||
<li><a href="/modules/usage/llms#model-recommendations">Recommended models to use</a></li>
|
||||
<li><a href="/modules/usage/runtimes#connecting-to-your-filesystem">Connecting OpenHands to your filesystem</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
BIN
Binary file not shown.
|
Before Width: | Height: | Size: 28 KiB |
BIN
Binary file not shown.
|
Before Width: | Height: | Size: 33 KiB |
@@ -20,6 +20,8 @@ To evaluate an agent, you can provide the agent's name to the `run_infer.py` pro
|
||||
### Evaluating Different LLMs
|
||||
|
||||
OpenHands in development mode uses `config.toml` to keep track of most configuration.
|
||||
**IMPORTANT: For evaluation, only the LLM section in `config.toml` will be used. Other configurations, such as `save_trajectory_path`, are not applied during evaluation.**
|
||||
|
||||
Here's an example configuration file you can use to define and use multiple LLMs:
|
||||
|
||||
```toml
|
||||
@@ -40,6 +42,8 @@ api_key = "XXX"
|
||||
temperature = 0.0
|
||||
```
|
||||
|
||||
For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
|
||||
|
||||
## Supported Benchmarks
|
||||
|
||||
The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
|
||||
|
||||
@@ -9,6 +9,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -17,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -60,17 +60,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=False,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -17,6 +17,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -40,21 +40,15 @@ from openhands.utils.async_utils import call_async_from_sync
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-slim'
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-slim',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -24,7 +25,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
load_from_toml,
|
||||
parse_arguments,
|
||||
@@ -47,22 +47,14 @@ SKIP_NUM = (
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.11-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.11-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
timeout=100,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=1800,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -22,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -57,18 +57,15 @@ def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
BIOCODER_BENCH_CONTAINER_IMAGE = 'public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0'
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = BIOCODER_BENCH_CONTAINER_IMAGE
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -17,6 +17,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -71,17 +71,15 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -10,6 +10,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -18,7 +19,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -36,17 +36,14 @@ def get_config(
|
||||
assert (
|
||||
metadata.max_iterations == 1
|
||||
), 'max_iterations must be 1 for browsing delegation evaluation.'
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=False,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -105,9 +105,7 @@ def get_config(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
# COMMIT0_CONTAINER_IMAGE = 'wentingzhao/'
|
||||
assert USE_INSTANCE_IMAGE
|
||||
# We use a different instance image for the each instance of commit0 eval
|
||||
repo_name = instance['repo'].split('/')[1]
|
||||
base_container_image = get_instance_docker_image(repo_name)
|
||||
logger.info(
|
||||
@@ -115,28 +113,16 @@ def get_config(
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
# else:
|
||||
# raise
|
||||
# base_container_image = SWE_BENCH_CONTAINER_IMAGE
|
||||
# logger.info(f'Using swe-bench container image: {base_container_image}')
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -62,17 +62,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -13,6 +13,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -21,10 +22,10 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.config.utils import get_agent_config_arg
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
||||
@@ -47,24 +48,25 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
if metadata.agent_config:
|
||||
config.set_agent_config(metadata.agent_config, metadata.agent_class)
|
||||
else:
|
||||
logger.info('Agent config not provided, using default settings')
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -238,6 +240,10 @@ if __name__ == '__main__':
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
agent_config = None
|
||||
if args.agent_config:
|
||||
agent_config = get_agent_config_arg(args.agent_config)
|
||||
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
@@ -256,6 +262,7 @@ if __name__ == '__main__':
|
||||
eval_output_dir=args.eval_output_dir,
|
||||
data_split=args.data_split,
|
||||
details={'gaia-level': args.level},
|
||||
agent_config=agent_config,
|
||||
)
|
||||
|
||||
dataset = load_dataset('gaia-benchmark/GAIA', args.level)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
@@ -9,6 +9,7 @@ AGENT=$3
|
||||
EVAL_LIMIT=$4
|
||||
LEVELS=$5
|
||||
NUM_WORKERS=$6
|
||||
AGENT_CONFIG=$7
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
@@ -49,5 +50,9 @@ if [ -n "$EVAL_LIMIT" ]; then
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
fi
|
||||
|
||||
if [ -n "$AGENT_CONFIG" ]; then
|
||||
echo "AGENT_CONFIG: $AGENT_CONFIG"
|
||||
COMMAND="$COMMAND --agent-config $AGENT_CONFIG"
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
|
||||
@@ -11,6 +11,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -19,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -40,17 +40,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -29,6 +29,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -37,7 +38,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -61,17 +61,14 @@ ACTION_FORMAT = """
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -30,7 +31,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -82,17 +82,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -9,6 +9,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -17,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -45,18 +45,18 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-logic-reasoning:v1.0'
|
||||
sandbox_config.runtime_extra_deps = (
|
||||
'$OH_INTERPRETER_PATH -m pip install scitools-pyke'
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
runtime_extra_deps='$OH_INTERPRETER_PATH -m pip install scitools-pyke',
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -12,6 +12,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -21,7 +22,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -55,23 +55,14 @@ def get_config(
|
||||
metadata: EvalMetadata,
|
||||
env_id: str,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-miniwob:v1.0'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
browsergym_eval_env=env_id,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
remote_runtime_init_timeout=1800,
|
||||
keep_runtime_alive=False,
|
||||
timeout=120,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -22,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -103,18 +103,18 @@ def load_incontext_example(task_name: str, with_tool: bool = True):
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-mint:v1.0'
|
||||
sandbox_config.runtime_extra_deps = (
|
||||
f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='xingyaoww/od-eval-mint:v1.0',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
runtime_extra_deps=f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -25,6 +25,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -33,7 +34,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
load_app_config,
|
||||
@@ -77,16 +77,14 @@ ID2CONDA = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'public.ecr.aws/i5g0m1f6/ml-bench'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='public.ecr.aws/i5g0m1f6/ml-bench',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Step 1: Stop all running containers
|
||||
echo "Stopping all running containers..."
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
RESULT_FILE=$1
|
||||
MODEL_CONFIG=$2
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -11,6 +11,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -20,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -59,22 +59,17 @@ def get_config(
|
||||
metadata: EvalMetadata,
|
||||
instance_id: str,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = (
|
||||
'docker.io/xingyaoww/openhands-eval-scienceagentbench'
|
||||
)
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_budget_per_task=4,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='docker.io/xingyaoww/openhands-eval-scienceagentbench',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
timeout=300,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from functools import partial
|
||||
@@ -21,13 +22,14 @@ from evaluation.benchmarks.swe_bench.run_infer import get_instance_docker_image
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
LLMConfig,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
@@ -79,22 +81,16 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> AppConfig:
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = AppConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=600,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_resource_factor=get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
),
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
@@ -415,13 +411,17 @@ if __name__ == '__main__':
|
||||
else:
|
||||
# Initialize with a dummy metadata when file doesn't exist
|
||||
metadata = EvalMetadata(
|
||||
agent_class="dummy_agent", # Placeholder agent class
|
||||
llm_config=LLMConfig(model="dummy_model"), # Minimal LLM config
|
||||
agent_class='dummy_agent', # Placeholder agent class
|
||||
llm_config=LLMConfig(model='dummy_model'), # Minimal LLM config
|
||||
max_iterations=1, # Minimal iterations
|
||||
eval_output_dir=os.path.dirname(args.input_file), # Use input file dir as output dir
|
||||
eval_output_dir=os.path.dirname(
|
||||
args.input_file
|
||||
), # Use input file dir as output dir
|
||||
start_time=time.strftime('%Y-%m-%d %H:%M:%S'), # Current time
|
||||
git_commit=subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('utf-8').strip(), # Current commit
|
||||
dataset=args.dataset # Dataset name from args
|
||||
git_commit=subprocess.check_output(['git', 'rev-parse', 'HEAD'])
|
||||
.decode('utf-8')
|
||||
.strip(), # Current commit
|
||||
dataset=args.dataset, # Dataset name from args
|
||||
)
|
||||
|
||||
# The evaluation harness constrains the signature of `process_instance_func` but we need to
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
{"pydata__xarray-6721": 8, "pytest-dev__pytest-7236": 8, "matplotlib__matplotlib-24627": 4, "django__django-15561": 4, "django__django-15098": 4, "django__django-14771": 4, "sympy__sympy-21612": 4, "sympy__sympy-15345": 4, "psf__requests-5414": 4, "astropy__astropy-14508": 2, "django__django-11451": 2, "django__django-11477": 2, "django__django-10880": 2, "django__django-11163": 2, "django__django-11815": 2, "astropy__astropy-14369": 2, "django__django-10097": 2, "django__django-10554": 2, "django__django-12304": 2, "django__django-12325": 2, "django__django-11551": 2, "django__django-11734": 2, "django__django-13109": 2, "django__django-13089": 2, "django__django-13343": 2, "django__django-13363": 2, "django__django-13809": 2, "django__django-13810": 2, "django__django-13786": 2, "django__django-13807": 2, "django__django-14493": 2, "django__django-11820": 2, "django__django-11951": 2, "django__django-11964": 2, "astropy__astropy-14309": 2, "astropy__astropy-14365": 2, "astropy__astropy-12907": 2, "astropy__astropy-14182": 2, "django__django-15161": 2, "django__django-15128": 2, "django__django-14999": 2, "django__django-14915": 2, "django__django-14752": 2, "django__django-14765": 2, "django__django-14089": 2, "django__django-15252": 2, "django__django-15380": 2, "django__django-15382": 2, "django__django-15499": 2, "django__django-15467": 2, "django__django-15280": 2, "django__django-15315": 2, "django__django-15277": 2, "django__django-15268": 2, "django__django-15629": 2, "django__django-15695": 2, "django__django-15732": 2, "django__django-15863": 2, "django__django-16082": 2, "django__django-16145": 2, "django__django-16256": 2, "django__django-16429": 2, "django__django-16454": 2, "django__django-16493": 2, "matplotlib__matplotlib-13989": 2, "matplotlib__matplotlib-20488": 2, "django__django-15503": 2, "django__django-15525": 2, "django__django-15375": 2, "django__django-15278": 2, "matplotlib__matplotlib-21568": 2, "matplotlib__matplotlib-20859": 2, "matplotlib__matplotlib-20826": 2, "matplotlib__matplotlib-20676": 2, "matplotlib__matplotlib-23412": 2, "matplotlib__matplotlib-22719": 2, "matplotlib__matplotlib-23299": 2, "matplotlib__matplotlib-22865": 2, "matplotlib__matplotlib-24149": 2, "matplotlib__matplotlib-24177": 2, "matplotlib__matplotlib-24570": 2, "matplotlib__matplotlib-24637": 2, "matplotlib__matplotlib-24970": 2, "matplotlib__matplotlib-23476": 2, "matplotlib__matplotlib-24026": 2, "matplotlib__matplotlib-23314": 2, "matplotlib__matplotlib-25332": 2, "matplotlib__matplotlib-25311": 2, "matplotlib__matplotlib-25122": 2, "matplotlib__matplotlib-25479": 2, "matplotlib__matplotlib-26342": 2, "psf__requests-2317": 2, "matplotlib__matplotlib-25960": 2, "matplotlib__matplotlib-25775": 2, "pydata__xarray-4356": 2, "pydata__xarray-4075": 2, "pydata__xarray-6461": 2, "pydata__xarray-4687": 2, "pydata__xarray-6599": 2, "pylint-dev__pylint-4661": 2, "django__django-15554": 2, "django__django-15563": 2, "pytest-dev__pytest-5262": 2, "pytest-dev__pytest-10081": 2, "scikit-learn__scikit-learn-12973": 2, "scikit-learn__scikit-learn-13124": 2, "scikit-learn__scikit-learn-13779": 2, "scikit-learn__scikit-learn-14141": 2, "scikit-learn__scikit-learn-13439": 2, "scikit-learn__scikit-learn-13496": 2, "scikit-learn__scikit-learn-15100": 2, "scikit-learn__scikit-learn-25102": 2, "scikit-learn__scikit-learn-25232": 2, "scikit-learn__scikit-learn-25747": 2, "scikit-learn__scikit-learn-26323": 2, "scikit-learn__scikit-learn-9288": 2, "scikit-learn__scikit-learn-14496": 2, "scikit-learn__scikit-learn-14629": 2, "sphinx-doc__sphinx-8265": 2, "sphinx-doc__sphinx-8548": 2, "sphinx-doc__sphinx-8593": 2, "sphinx-doc__sphinx-8595": 2, "sphinx-doc__sphinx-8621": 2, "sphinx-doc__sphinx-8638": 2, "sphinx-doc__sphinx-9229": 2, "sphinx-doc__sphinx-9281": 2, "sphinx-doc__sphinx-9461": 2, "sphinx-doc__sphinx-9591": 2, "sphinx-doc__sphinx-9658": 2, "sphinx-doc__sphinx-9673": 2, "sympy__sympy-12096": 2, "sympy__sympy-12481": 2, "sphinx-doc__sphinx-10323": 2, "sphinx-doc__sphinx-7590": 2, "sympy__sympy-13877": 2, "sympy__sympy-12489": 2, "sympy__sympy-15809": 2, "sympy__sympy-14711": 2, "sympy__sympy-16597": 2, "sympy__sympy-16766": 2, "sympy__sympy-16792": 2, "sympy__sympy-15875": 2, "sympy__sympy-17655": 2, "sympy__sympy-18189": 2, "sympy__sympy-18763": 2, "sympy__sympy-19040": 2, "sympy__sympy-19495": 2, "sympy__sympy-19637": 2, "sympy__sympy-19783": 2, "sympy__sympy-17630": 2, "sympy__sympy-20428": 2, "sympy__sympy-20590": 2, "sympy__sympy-20801": 2, "sympy__sympy-21379": 2, "sympy__sympy-21847": 2, "sympy__sympy-22456": 2, "sympy__sympy-22714": 2, "sympy__sympy-22914": 2, "sympy__sympy-23262": 2, "sympy__sympy-23413": 2, "sympy__sympy-23534": 2, "sympy__sympy-24066": 2, "sympy__sympy-24213": 2, "sympy__sympy-24443": 2, "sympy__sympy-24562": 2, "sympy__sympy-24661": 2}
|
||||
@@ -18,6 +18,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
@@ -30,7 +31,6 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -122,30 +122,23 @@ def get_config(
|
||||
base_container_image = SWE_BENCH_CONTAINER_IMAGE
|
||||
logger.info(f'Using swe-bench container image: {base_container_image}')
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.enable_auto_lint = True
|
||||
sandbox_config.use_host_network = False
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_api_timeout=120,
|
||||
remote_runtime_resource_factor=get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
),
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
@@ -331,6 +324,22 @@ def complete_runtime(
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if obs.exit_code == -1:
|
||||
# The previous command is still running
|
||||
# We need to kill previous command
|
||||
logger.info('The previous command is still running, trying to kill it...')
|
||||
action = CmdRunAction(command='C-c')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Then run the command again
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
LEVEL=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is ONLY used for pushing docker images created by https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md
|
||||
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
INPUT_FILE=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
source ~/.bashrc
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
@@ -267,7 +267,9 @@ def pre_login(
|
||||
obs: BrowserOutputObservation = runtime.run_action(browser_action)
|
||||
logger.debug(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if save_screenshots:
|
||||
image_data = base64.b64decode(obs.screenshot)
|
||||
image_data = base64.b64decode(
|
||||
obs.screenshot.replace('data:image/png;base64,', '')
|
||||
)
|
||||
with open(os.path.join(directory, f'{image_id}.png'), 'wb') as file:
|
||||
file.write(image_data)
|
||||
image_id += 1
|
||||
|
||||
@@ -13,14 +13,16 @@ from typing import List
|
||||
import yaml
|
||||
from browsing import pre_login
|
||||
|
||||
from evaluation.utils.shared import get_default_sandbox_config_for_eval
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
LLMConfig,
|
||||
SandboxConfig,
|
||||
get_agent_config_arg,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.config.agent_config import AgentConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import CmdRunAction, MessageAction
|
||||
@@ -34,7 +36,10 @@ def get_config(
|
||||
task_short_name: str,
|
||||
mount_path_on_host: str,
|
||||
llm_config: LLMConfig,
|
||||
agent_config: AgentConfig | None,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
config = AppConfig(
|
||||
run_as_openhands=False,
|
||||
max_budget_per_task=4,
|
||||
@@ -42,22 +47,21 @@ def get_config(
|
||||
save_trajectory_path=os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
# using host network to access the host machine from the container
|
||||
use_host_network=True,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# we mount trajectories path so that trajectories, generated by OpenHands
|
||||
# controller, can be accessible to the evaluator file in the runtime container
|
||||
workspace_mount_path=mount_path_on_host,
|
||||
workspace_mount_path_in_sandbox='/outputs',
|
||||
)
|
||||
config.set_llm_config(llm_config)
|
||||
if agent_config:
|
||||
config.set_agent_config(agent_config)
|
||||
else:
|
||||
logger.info('Agent config not provided, using default settings')
|
||||
agent_config = AgentConfig(
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
@@ -148,11 +152,21 @@ def run_solver(
|
||||
os.makedirs(screenshots_dir, exist_ok=True)
|
||||
for image_id, obs in enumerate(state.history):
|
||||
if isinstance(obs, BrowserOutputObservation):
|
||||
image_data = base64.b64decode(obs.screenshot)
|
||||
image_data = base64.b64decode(
|
||||
obs.screenshot.replace('data:image/png;base64,', '')
|
||||
)
|
||||
with open(
|
||||
os.path.join(screenshots_dir, f'{image_id}.png'), 'wb'
|
||||
) as file:
|
||||
file.write(image_data)
|
||||
if obs.set_of_marks:
|
||||
som_image_data = base64.b64decode(
|
||||
obs.set_of_marks.replace('data:image/png;base64,', '')
|
||||
)
|
||||
with open(
|
||||
os.path.join(screenshots_dir, f'{image_id}_som.png'), 'wb'
|
||||
) as file:
|
||||
file.write(som_image_data)
|
||||
|
||||
if save_final_state:
|
||||
os.makedirs(state_dir, exist_ok=True)
|
||||
@@ -215,6 +229,10 @@ if __name__ == '__main__':
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
agent_config: AgentConfig | None = None
|
||||
if args.agent_config:
|
||||
agent_config = get_agent_config_arg(args.agent_config)
|
||||
|
||||
agent_llm_config: LLMConfig | None = None
|
||||
if args.agent_llm_config:
|
||||
agent_llm_config = get_llm_config_arg(args.agent_llm_config)
|
||||
@@ -255,7 +273,7 @@ if __name__ == '__main__':
|
||||
else:
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
config: AppConfig = get_config(
|
||||
args.task_image_name, task_short_name, temp_dir, agent_llm_config
|
||||
args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
|
||||
)
|
||||
runtime: Runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
##################################################################################################
|
||||
# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.sh
|
||||
@@ -44,6 +44,10 @@ while [[ $# -gt 0 ]]; do
|
||||
ENV_LLM_CONFIG="$2"
|
||||
shift 2
|
||||
;;
|
||||
--agent-config)
|
||||
AGENT_CONFIG="$2"
|
||||
shift 2
|
||||
;;
|
||||
--outputs-path)
|
||||
OUTPUTS_PATH="$2"
|
||||
shift 2
|
||||
@@ -125,8 +129,6 @@ temp_file="tasks_${START_PERCENTILE}_${END_PERCENTILE}.md"
|
||||
sed -n "${start_line},${end_line}p" tasks.md > "$temp_file"
|
||||
|
||||
while IFS= read -r task_image; do
|
||||
docker pull $task_image
|
||||
|
||||
# Remove prefix using ## to remove longest matching pattern from start
|
||||
task_name=${task_image##ghcr.io/theagentcompany/}
|
||||
|
||||
@@ -140,13 +142,23 @@ while IFS= read -r task_image; do
|
||||
continue
|
||||
fi
|
||||
|
||||
export PYTHONPATH=evaluation/benchmarks/the_agent_company:\$PYTHONPATH && \
|
||||
poetry run python run_infer.py \
|
||||
--agent-llm-config "$AGENT_LLM_CONFIG" \
|
||||
--env-llm-config "$ENV_LLM_CONFIG" \
|
||||
--outputs-path "$OUTPUTS_PATH" \
|
||||
--server-hostname "$SERVER_HOSTNAME" \
|
||||
--task-image-name "$task_image"
|
||||
docker pull $task_image
|
||||
|
||||
# Build the Python command
|
||||
COMMAND="poetry run python run_infer.py \
|
||||
--agent-llm-config \"$AGENT_LLM_CONFIG\" \
|
||||
--env-llm-config \"$ENV_LLM_CONFIG\" \
|
||||
--outputs-path \"$OUTPUTS_PATH\" \
|
||||
--server-hostname \"$SERVER_HOSTNAME\" \
|
||||
--task-image-name \"$task_image\""
|
||||
|
||||
# Add agent-config if it's defined
|
||||
if [ -n "$AGENT_CONFIG" ]; then
|
||||
COMMAND="$COMMAND --agent-config $AGENT_CONFIG"
|
||||
fi
|
||||
|
||||
export PYTHONPATH=evaluation/benchmarks/the_agent_company:$PYTHONPATH && \
|
||||
eval "$COMMAND"
|
||||
|
||||
# Prune unused images and volumes
|
||||
docker image rm "$task_image"
|
||||
|
||||
@@ -10,6 +10,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -18,7 +19,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -41,17 +41,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -11,6 +11,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -20,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -55,32 +55,29 @@ def get_config(
|
||||
assert base_url is not None, 'VISUALWEBARENA_BASE_URL must be set'
|
||||
assert openai_api_key is not None, 'OPENAI_API_KEY must be set'
|
||||
assert openai_base_url is not None, 'OPENAI_BASE_URL must be set'
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
sandbox_config.browsergym_eval_env = env_id
|
||||
sandbox_config.runtime_startup_env_vars = {
|
||||
'BASE_URL': base_url,
|
||||
'OPENAI_API_KEY': openai_api_key,
|
||||
'OPENAI_BASE_URL': openai_base_url,
|
||||
'VWA_CLASSIFIEDS': f'{base_url}:9980',
|
||||
'VWA_CLASSIFIEDS_RESET_TOKEN': '4b61655535e7ed388f0d40a93600254c',
|
||||
'VWA_SHOPPING': f'{base_url}:7770',
|
||||
'VWA_SHOPPING_ADMIN': f'{base_url}:7780/admin',
|
||||
'VWA_REDDIT': f'{base_url}:9999',
|
||||
'VWA_GITLAB': f'{base_url}:8023',
|
||||
'VWA_WIKIPEDIA': f'{base_url}:8888',
|
||||
'VWA_HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
browsergym_eval_env=env_id,
|
||||
runtime_startup_env_vars={
|
||||
'BASE_URL': base_url,
|
||||
'OPENAI_API_KEY': openai_api_key,
|
||||
'OPENAI_BASE_URL': openai_base_url,
|
||||
'VWA_CLASSIFIEDS': f'{base_url}:9980',
|
||||
'VWA_CLASSIFIEDS_RESET_TOKEN': '4b61655535e7ed388f0d40a93600254c',
|
||||
'VWA_SHOPPING': f'{base_url}:7770',
|
||||
'VWA_SHOPPING_ADMIN': f'{base_url}:7780/admin',
|
||||
'VWA_REDDIT': f'{base_url}:9999',
|
||||
'VWA_GITLAB': f'{base_url}:8023',
|
||||
'VWA_WIKIPEDIA': f'{base_url}:8888',
|
||||
'VWA_HOMEPAGE': f'{base_url}:4399',
|
||||
},
|
||||
timeout=300,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -11,6 +11,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -19,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -50,29 +50,26 @@ def get_config(
|
||||
assert base_url is not None, 'WEBARENA_BASE_URL must be set'
|
||||
assert openai_api_key is not None, 'OPENAI_API_KEY must be set'
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
sandbox_config.browsergym_eval_env = env_id
|
||||
sandbox_config.runtime_startup_env_vars = {
|
||||
'BASE_URL': base_url,
|
||||
'OPENAI_API_KEY': openai_api_key,
|
||||
'SHOPPING': f'{base_url}:7770/',
|
||||
'SHOPPING_ADMIN': f'{base_url}:7780/admin',
|
||||
'REDDIT': f'{base_url}:9999',
|
||||
'GITLAB': f'{base_url}:8023',
|
||||
'WIKIPEDIA': f'{base_url}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing',
|
||||
'MAP': f'{base_url}:3000',
|
||||
'HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
browsergym_eval_env=env_id,
|
||||
runtime_startup_env_vars={
|
||||
'BASE_URL': base_url,
|
||||
'OPENAI_API_KEY': openai_api_key,
|
||||
'SHOPPING': f'{base_url}:7770/',
|
||||
'SHOPPING_ADMIN': f'{base_url}:7780/admin',
|
||||
'REDDIT': f'{base_url}:9999',
|
||||
'GITLAB': f'{base_url}:8023',
|
||||
'WIKIPEDIA': f'{base_url}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing',
|
||||
'MAP': f'{base_url}:3000',
|
||||
'HOMEPAGE': f'{base_url}:4399',
|
||||
},
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user