mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
18 Commits
add-regres
...
improve-ag
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ba79c454b | ||
|
|
fbc06f42aa | ||
|
|
f35ed5e277 | ||
|
|
6787a3adf7 | ||
|
|
fa50e0c9b9 | ||
|
|
f4c5bbda19 | ||
|
|
6562297615 | ||
|
|
0217a7cfbd | ||
|
|
aa15c9d385 | ||
|
|
8ad89e368a | ||
|
|
29ba94fc0f | ||
|
|
8956f92f6a | ||
|
|
753e3c4205 | ||
|
|
ecd573febc | ||
|
|
325a558fbc | ||
|
|
666c186826 | ||
|
|
2d2dbf1561 | ||
|
|
abac25cc4c |
15
.github/pull_request_template.md
vendored
15
.github/pull_request_template.md
vendored
@@ -1,11 +1,12 @@
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces**
|
||||
|
||||
- [ ] Include this change in the Release Notes. If checked, you must provide an **end-user friendly** description for your change below
|
||||
|
||||
---
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions**
|
||||
- [ ] This change is worth documenting at https://docs.all-hands.dev/
|
||||
- [ ] Include this change in the Release Notes. If checked, you **must** provide an **end-user friendly** description for your change below
|
||||
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses**
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses.**
|
||||
|
||||
2
.github/workflows/py-unit-tests.yml
vendored
2
.github/workflows/py-unit-tests.yml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Run Tests
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_long_term_memory.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -233,6 +233,3 @@ containers/runtime/Dockerfile
|
||||
containers/runtime/project.tar.gz
|
||||
containers/runtime/code
|
||||
**/node_modules/
|
||||
|
||||
# regression test workspaces
|
||||
tests/regression/cases/*/workspace/
|
||||
|
||||
@@ -100,7 +100,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.25-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.26-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,4 +1,4 @@
|
||||
SHELL=/bin/bash
|
||||
SHELL=/usr/bin/env bash
|
||||
# Makefile for OpenHands project
|
||||
|
||||
# Variables
|
||||
|
||||
@@ -43,17 +43,17 @@ See the [Running OpenHands](https://docs.all-hands.dev/modules/usage/installatio
|
||||
system requirements and more information.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
@@ -17,6 +17,12 @@
|
||||
#modal_api_token_id = ""
|
||||
#modal_api_token_secret = ""
|
||||
|
||||
# API key for Daytona
|
||||
#daytona_api_key = ""
|
||||
|
||||
# Daytona Target
|
||||
#daytona_target = ""
|
||||
|
||||
# Base path for the workspace
|
||||
workspace_base = "./workspace"
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Initialize variables with default values
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.25-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.26-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -o pipefail
|
||||
|
||||
function get_docker() {
|
||||
|
||||
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
|
||||
@@ -16,7 +16,7 @@ some flags being passed to `docker run` that make this possible:
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Step 1: Stop all running containers
|
||||
echo "Stopping all running containers..."
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
RESULT_FILE=$1
|
||||
MODEL_CONFIG=$2
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
LEVEL=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is ONLY used for pushing docker images created by https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
INPUT_FILE=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
source ~/.bashrc
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
##################################################################################################
|
||||
# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.sh
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
echo "hello world"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
# API base URL
|
||||
|
||||
@@ -65,6 +65,12 @@ describe("extractModelAndProvider", () => {
|
||||
separator: "/",
|
||||
});
|
||||
|
||||
expect(extractModelAndProvider("claude-3-7-sonnet-20250219")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-3-7-sonnet-20250219",
|
||||
separator: "/",
|
||||
});
|
||||
|
||||
expect(extractModelAndProvider("claude-3-haiku-20240307")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-3-haiku-20240307",
|
||||
|
||||
4
frontend/package-lock.json
generated
4
frontend/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.25.0",
|
||||
"version": "0.26.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.25.0",
|
||||
"version": "0.26.0",
|
||||
"dependencies": {
|
||||
"@heroui/react": "2.6.14",
|
||||
"@monaco-editor/react": "^4.7.0-rc.0",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.25.0",
|
||||
"version": "0.26.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
|
||||
@@ -12,13 +12,14 @@ export function StyledSwitchComponent({
|
||||
className={cn(
|
||||
"w-12 h-6 rounded-xl flex items-center p-1.5 cursor-pointer",
|
||||
isToggled && "justify-end bg-primary",
|
||||
!isToggled && "justify-start bg-[#1F2228] border border-tertiary-alt",
|
||||
!isToggled &&
|
||||
"justify-start bg-base-secondary border border-tertiary-light",
|
||||
)}
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
"bg-[#1F2228] w-3 h-3 rounded-xl",
|
||||
isToggled ? "bg-[#1F2228]" : "bg-tertiary-alt",
|
||||
"w-3 h-3 rounded-xl",
|
||||
isToggled ? "bg-base-secondary" : "bg-tertiary-light",
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -3,6 +3,7 @@ export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic", "deepseek"];
|
||||
export const VERIFIED_MODELS = [
|
||||
"o3-mini-2025-01-31",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"claude-3-7-sonnet-20250219",
|
||||
"deepseek-chat",
|
||||
];
|
||||
|
||||
@@ -31,4 +32,5 @@ export const VERIFIED_ANTHROPIC_MODELS = [
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-3-7-sonnet-20250219",
|
||||
];
|
||||
|
||||
@@ -100,6 +100,7 @@ async def main(loop: asyncio.AbstractEventLoop):
|
||||
initial_user_action = MessageAction(content=task_str) if task_str else None
|
||||
|
||||
sid = str(uuid4())
|
||||
display_message(f'Session ID: {sid}')
|
||||
|
||||
runtime = create_runtime(config, sid=sid, headless_mode=True)
|
||||
await runtime.connect()
|
||||
|
||||
@@ -5,6 +5,7 @@ from openhands.core.config.config_utils import (
|
||||
OH_MAX_ITERATIONS,
|
||||
get_field_info,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.config.sandbox_config import SandboxConfig
|
||||
from openhands.core.config.security_config import SecurityConfig
|
||||
@@ -28,6 +29,7 @@ __all__ = [
|
||||
'LLMConfig',
|
||||
'SandboxConfig',
|
||||
'SecurityConfig',
|
||||
'ExtendedConfig',
|
||||
'load_app_config',
|
||||
'load_from_env',
|
||||
'load_from_toml',
|
||||
|
||||
@@ -9,6 +9,7 @@ from openhands.core.config.config_utils import (
|
||||
OH_MAX_ITERATIONS,
|
||||
model_defaults_to_dict,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.config.sandbox_config import SandboxConfig
|
||||
from openhands.core.config.security_config import SecurityConfig
|
||||
@@ -52,6 +53,7 @@ class AppConfig(BaseModel):
|
||||
default_agent: str = Field(default=OH_DEFAULT_AGENT)
|
||||
sandbox: SandboxConfig = Field(default_factory=SandboxConfig)
|
||||
security: SecurityConfig = Field(default_factory=SecurityConfig)
|
||||
extended: ExtendedConfig = Field(default_factory=lambda: ExtendedConfig({}))
|
||||
runtime: str = Field(default='docker')
|
||||
file_store: str = Field(default='local')
|
||||
file_store_path: str = Field(default='/tmp/openhands_file_store')
|
||||
@@ -75,6 +77,9 @@ class AppConfig(BaseModel):
|
||||
file_uploads_restrict_file_types: bool = Field(default=False)
|
||||
file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*'])
|
||||
runloop_api_key: SecretStr | None = Field(default=None)
|
||||
daytona_api_key: SecretStr | None = Field(default=None)
|
||||
daytona_api_url: str = Field(default='https://app.daytona.io/api')
|
||||
daytona_target: str = Field(default='us')
|
||||
cli_multiline_input: bool = Field(default=False)
|
||||
conversation_max_age_seconds: int = Field(default=864000) # 10 days in seconds
|
||||
|
||||
|
||||
@@ -26,8 +26,10 @@ class RecentEventsCondenserConfig(BaseModel):
|
||||
"""Configuration for RecentEventsCondenser."""
|
||||
|
||||
type: Literal['recent'] = Field('recent')
|
||||
|
||||
# at least one event by default, because the best guess is that it is the user task
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
default=1,
|
||||
description='The number of initial events to condense.',
|
||||
ge=0,
|
||||
)
|
||||
@@ -43,6 +45,8 @@ class LLMSummarizingCondenserConfig(BaseModel):
|
||||
llm_config: LLMConfig = Field(
|
||||
..., description='Configuration for the LLM to use for condensing.'
|
||||
)
|
||||
|
||||
# at least one event by default, because the best guess is that it's the user task
|
||||
keep_first: int = Field(
|
||||
default=1,
|
||||
description='The number of initial events to condense.',
|
||||
@@ -62,8 +66,10 @@ class AmortizedForgettingCondenserConfig(BaseModel):
|
||||
description='Maximum size of the condensed history before triggering forgetting.',
|
||||
ge=2,
|
||||
)
|
||||
|
||||
# at least one event by default, because the best guess is that it's the user task
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
default=1,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
@@ -81,8 +87,10 @@ class LLMAttentionCondenserConfig(BaseModel):
|
||||
description='Maximum size of the condensed history before triggering forgetting.',
|
||||
ge=2,
|
||||
)
|
||||
|
||||
# at least one event by default, because the best guess is that it's the user task
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
default=1,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
|
||||
@@ -25,14 +25,20 @@ def get_field_info(field: FieldInfo) -> dict[str, Any]:
|
||||
# Note: this only works for UnionTypes with None as one of the types
|
||||
if get_origin(field_type) is UnionType:
|
||||
types = get_args(field_type)
|
||||
non_none_arg = next((t for t in types if t is not type(None)), None)
|
||||
non_none_arg = next(
|
||||
(t for t in types if t is not None and t is not type(None)), None
|
||||
)
|
||||
if non_none_arg is not None:
|
||||
field_type = non_none_arg
|
||||
optional = True
|
||||
|
||||
# type name in a pretty format
|
||||
type_name = (
|
||||
field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
|
||||
str(field_type)
|
||||
if field_type is None
|
||||
else (
|
||||
field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
|
||||
)
|
||||
)
|
||||
|
||||
# default is always present
|
||||
|
||||
40
openhands/core/config/extended_config.py
Normal file
40
openhands/core/config/extended_config.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from pydantic import RootModel
|
||||
|
||||
|
||||
class ExtendedConfig(RootModel[dict]):
|
||||
"""Configuration for extended functionalities.
|
||||
|
||||
This is implemented as a root model so that the entire input is stored
|
||||
as the root value. This allows arbitrary keys to be stored and later
|
||||
accessed via attribute or dictionary-style access.
|
||||
"""
|
||||
|
||||
@property
|
||||
def root(self) -> dict: # type annotation to help mypy
|
||||
return super().root
|
||||
|
||||
def __str__(self) -> str:
|
||||
# Use the root dict to build a string representation.
|
||||
attr_str = [f'{k}={repr(v)}' for k, v in self.root.items()]
|
||||
return f"ExtendedConfig({', '.join(attr_str)})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> 'ExtendedConfig':
|
||||
# Create an instance directly by wrapping the input dict.
|
||||
return cls(data)
|
||||
|
||||
def __getitem__(self, key: str) -> object:
|
||||
# Provide dictionary-like access via the root dict.
|
||||
return self.root[key]
|
||||
|
||||
def __getattr__(self, key: str) -> object:
|
||||
# Fallback for attribute access using the root dict.
|
||||
try:
|
||||
return self.root[key]
|
||||
except KeyError as e:
|
||||
raise AttributeError(
|
||||
f"'ExtendedConfig' object has no attribute '{key}'"
|
||||
) from e
|
||||
@@ -53,11 +53,11 @@ class SandboxConfig(BaseModel):
|
||||
remote_runtime_api_timeout: int = Field(default=10)
|
||||
remote_runtime_enable_retries: bool = Field(default=False)
|
||||
remote_runtime_class: str | None = Field(
|
||||
default='sysbox'
|
||||
default=None
|
||||
) # can be "None" (default to gvisor) or "sysbox" (support docker inside runtime + more stable)
|
||||
enable_auto_lint: bool = Field(
|
||||
default=False # once enabled, OpenHands would lint files after editing
|
||||
)
|
||||
default=False
|
||||
) # once enabled, OpenHands would lint files after editing
|
||||
use_host_network: bool = Field(default=False)
|
||||
runtime_extra_build_args: list[str] | None = Field(default=None)
|
||||
initialize_plugins: bool = Field(default=True)
|
||||
|
||||
@@ -19,6 +19,7 @@ from openhands.core.config.config_utils import (
|
||||
OH_DEFAULT_AGENT,
|
||||
OH_MAX_ITERATIONS,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.config.sandbox_config import SandboxConfig
|
||||
from openhands.core.config.security_config import SecurityConfig
|
||||
@@ -134,6 +135,10 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml') -> None:
|
||||
for key, value in toml_config.items():
|
||||
if isinstance(value, dict):
|
||||
try:
|
||||
if key.lower() == 'extended':
|
||||
# For ExtendedConfig (RootModel), pass the entire dict as the root value
|
||||
cfg.extended = ExtendedConfig(value)
|
||||
continue
|
||||
if key is not None and key.lower() == 'agent':
|
||||
# Every entry here is either a field for the default `agent` config group, or itself a group
|
||||
# The best way to tell the difference is to try to parse it as an AgentConfig object
|
||||
|
||||
@@ -10,17 +10,17 @@ class AgentError(Exception):
|
||||
|
||||
|
||||
class AgentNoInstructionError(AgentError):
|
||||
def __init__(self, message='Instruction must be provided'):
|
||||
def __init__(self, message: str = 'Instruction must be provided') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentEventTypeError(AgentError):
|
||||
def __init__(self, message='Event must be a dictionary'):
|
||||
def __init__(self, message: str = 'Event must be a dictionary') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentAlreadyRegisteredError(AgentError):
|
||||
def __init__(self, name=None):
|
||||
def __init__(self, name: str | None = None) -> None:
|
||||
if name is not None:
|
||||
message = f"Agent class already registered under '{name}'"
|
||||
else:
|
||||
@@ -29,7 +29,7 @@ class AgentAlreadyRegisteredError(AgentError):
|
||||
|
||||
|
||||
class AgentNotRegisteredError(AgentError):
|
||||
def __init__(self, name=None):
|
||||
def __init__(self, name: str | None = None) -> None:
|
||||
if name is not None:
|
||||
message = f"No agent class registered under '{name}'"
|
||||
else:
|
||||
@@ -38,7 +38,7 @@ class AgentNotRegisteredError(AgentError):
|
||||
|
||||
|
||||
class AgentStuckInLoopError(AgentError):
|
||||
def __init__(self, message='Agent got stuck in a loop'):
|
||||
def __init__(self, message: str = 'Agent got stuck in a loop') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class AgentStuckInLoopError(AgentError):
|
||||
|
||||
|
||||
class TaskInvalidStateError(Exception):
|
||||
def __init__(self, state=None):
|
||||
def __init__(self, state: str | None = None) -> None:
|
||||
if state is not None:
|
||||
message = f'Invalid state {state}'
|
||||
else:
|
||||
@@ -64,45 +64,47 @@ class TaskInvalidStateError(Exception):
|
||||
# This exception gets sent back to the LLM
|
||||
# It might be malformed JSON
|
||||
class LLMMalformedActionError(Exception):
|
||||
def __init__(self, message='Malformed response'):
|
||||
def __init__(self, message: str = 'Malformed response') -> None:
|
||||
self.message = message
|
||||
super().__init__(message)
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return self.message
|
||||
|
||||
|
||||
# This exception gets sent back to the LLM
|
||||
# For some reason, the agent did not return an action
|
||||
class LLMNoActionError(Exception):
|
||||
def __init__(self, message='Agent must return an action'):
|
||||
def __init__(self, message: str = 'Agent must return an action') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
# This exception gets sent back to the LLM
|
||||
# The LLM output did not include an action, or the action was not the expected type
|
||||
class LLMResponseError(Exception):
|
||||
def __init__(self, message='Failed to retrieve action from LLM response'):
|
||||
def __init__(
|
||||
self, message: str = 'Failed to retrieve action from LLM response'
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class UserCancelledError(Exception):
|
||||
def __init__(self, message='User cancelled the request'):
|
||||
def __init__(self, message: str = 'User cancelled the request') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class OperationCancelled(Exception):
|
||||
"""Exception raised when an operation is cancelled (e.g. by a keyboard interrupt)."""
|
||||
|
||||
def __init__(self, message='Operation was cancelled'):
|
||||
def __init__(self, message: str = 'Operation was cancelled') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class LLMContextWindowExceedError(RuntimeError):
|
||||
def __init__(
|
||||
self,
|
||||
message='Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error',
|
||||
):
|
||||
message: str = 'Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error',
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -117,7 +119,7 @@ class FunctionCallConversionError(Exception):
|
||||
This typically happens when there's a malformed message (e.g., missing <function=...> tags). But not due to LLM output.
|
||||
"""
|
||||
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -127,14 +129,14 @@ class FunctionCallValidationError(Exception):
|
||||
This typically happens when the LLM outputs unrecognized function call / parameter names / values.
|
||||
"""
|
||||
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class FunctionCallNotExistsError(Exception):
|
||||
"""Exception raised when an LLM call a tool that is not registered."""
|
||||
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -191,15 +193,17 @@ class AgentRuntimeNotFoundError(AgentRuntimeUnavailableError):
|
||||
|
||||
|
||||
class BrowserInitException(Exception):
|
||||
def __init__(self, message='Failed to initialize browser environment'):
|
||||
def __init__(
|
||||
self, message: str = 'Failed to initialize browser environment'
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class BrowserUnavailableException(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
message='Browser environment is not available, please check if has been initialized',
|
||||
):
|
||||
message: str = 'Browser environment is not available, please check if has been initialized',
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -217,5 +221,5 @@ class MicroAgentError(Exception):
|
||||
class MicroAgentValidationError(MicroAgentError):
|
||||
"""Raised when there's a validation error in microagent metadata."""
|
||||
|
||||
def __init__(self, message='Micro agent validation failed'):
|
||||
def __init__(self, message: str = 'Micro agent validation failed') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
@@ -74,10 +74,11 @@ LOG_COLORS: Mapping[str, ColorType] = {
|
||||
|
||||
|
||||
class StackInfoFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if record.levelno >= logging.ERROR:
|
||||
record.stack_info = True
|
||||
record.exc_info = True
|
||||
# LogRecord attributes are dynamically typed
|
||||
setattr(record, 'stack_info', True)
|
||||
setattr(record, 'exc_info', sys.exc_info())
|
||||
return True
|
||||
|
||||
|
||||
@@ -107,9 +108,9 @@ def strip_ansi(s: str) -> str:
|
||||
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
def format(self, record):
|
||||
msg_type = record.__dict__.get('msg_type')
|
||||
event_source = record.__dict__.get('event_source')
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
msg_type = record.__dict__.get('msg_type', '')
|
||||
event_source = record.__dict__.get('event_source', '')
|
||||
if event_source:
|
||||
new_msg_type = f'{event_source.upper()}_{msg_type}'
|
||||
if new_msg_type in LOG_COLORS:
|
||||
@@ -136,12 +137,13 @@ class ColoredFormatter(logging.Formatter):
|
||||
return super().format(new_record)
|
||||
|
||||
|
||||
def _fix_record(record: logging.LogRecord):
|
||||
def _fix_record(record: logging.LogRecord) -> logging.LogRecord:
|
||||
new_record = copy.copy(record)
|
||||
# The formatter expects non boolean values, and will raise an exception if there is a boolean - so we fix these
|
||||
if new_record.exc_info is True and not new_record.exc_text: # type: ignore
|
||||
new_record.exc_info = sys.exc_info() # type: ignore
|
||||
new_record.stack_info = None # type: ignore
|
||||
# LogRecord attributes are dynamically typed
|
||||
if getattr(new_record, 'exc_info', None) is True:
|
||||
setattr(new_record, 'exc_info', sys.exc_info())
|
||||
setattr(new_record, 'stack_info', None)
|
||||
return new_record
|
||||
|
||||
|
||||
@@ -158,32 +160,32 @@ class RollingLogger:
|
||||
log_lines: list[str]
|
||||
all_lines: str
|
||||
|
||||
def __init__(self, max_lines=10, char_limit=80):
|
||||
def __init__(self, max_lines: int = 10, char_limit: int = 80) -> None:
|
||||
self.max_lines = max_lines
|
||||
self.char_limit = char_limit
|
||||
self.log_lines = [''] * self.max_lines
|
||||
self.all_lines = ''
|
||||
|
||||
def is_enabled(self):
|
||||
def is_enabled(self) -> bool:
|
||||
return DEBUG and sys.stdout.isatty()
|
||||
|
||||
def start(self, message=''):
|
||||
def start(self, message: str = '') -> None:
|
||||
if message:
|
||||
print(message)
|
||||
self._write('\n' * self.max_lines)
|
||||
self._flush()
|
||||
|
||||
def add_line(self, line):
|
||||
def add_line(self, line: str) -> None:
|
||||
self.log_lines.pop(0)
|
||||
self.log_lines.append(line[: self.char_limit])
|
||||
self.print_lines()
|
||||
self.all_lines += line + '\n'
|
||||
|
||||
def write_immediately(self, line):
|
||||
def write_immediately(self, line: str) -> None:
|
||||
self._write(line)
|
||||
self._flush()
|
||||
|
||||
def print_lines(self):
|
||||
def print_lines(self) -> None:
|
||||
"""Display the last n log_lines in the console (not for file logging).
|
||||
|
||||
This will create the effect of a rolling display in the console.
|
||||
@@ -192,31 +194,31 @@ class RollingLogger:
|
||||
for line in self.log_lines:
|
||||
self.replace_current_line(line)
|
||||
|
||||
def move_back(self, amount=-1):
|
||||
def move_back(self, amount: int = -1) -> None:
|
||||
r"""'\033[F' moves the cursor up one line."""
|
||||
if amount == -1:
|
||||
amount = self.max_lines
|
||||
self._write('\033[F' * (self.max_lines))
|
||||
self._flush()
|
||||
|
||||
def replace_current_line(self, line=''):
|
||||
def replace_current_line(self, line: str = '') -> None:
|
||||
r"""'\033[2K\r' clears the line and moves the cursor to the beginning of the line."""
|
||||
self._write('\033[2K' + line + '\n')
|
||||
self._flush()
|
||||
|
||||
def _write(self, line):
|
||||
def _write(self, line: str) -> None:
|
||||
if not self.is_enabled():
|
||||
return
|
||||
sys.stdout.write(line)
|
||||
|
||||
def _flush(self):
|
||||
def _flush(self) -> None:
|
||||
if not self.is_enabled():
|
||||
return
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
class SensitiveDataFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
# Gather sensitive values which should not ever appear in the logs.
|
||||
sensitive_values = []
|
||||
for key, value in os.environ.items():
|
||||
@@ -245,6 +247,7 @@ class SensitiveDataFilter(logging.Filter):
|
||||
'modal_api_token_secret',
|
||||
'llm_api_key',
|
||||
'sandbox_env_github_token',
|
||||
'daytona_api_key',
|
||||
]
|
||||
|
||||
# add env var names
|
||||
@@ -262,7 +265,9 @@ class SensitiveDataFilter(logging.Filter):
|
||||
return True
|
||||
|
||||
|
||||
def get_console_handler(log_level: int = logging.INFO, extra_info: str | None = None):
|
||||
def get_console_handler(
|
||||
log_level: int = logging.INFO, extra_info: str | None = None
|
||||
) -> logging.StreamHandler:
|
||||
"""Returns a console handler for logging."""
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(log_level)
|
||||
@@ -273,7 +278,9 @@ def get_console_handler(log_level: int = logging.INFO, extra_info: str | None =
|
||||
return console_handler
|
||||
|
||||
|
||||
def get_file_handler(log_dir: str, log_level: int = logging.INFO):
|
||||
def get_file_handler(
|
||||
log_dir: str, log_level: int = logging.INFO
|
||||
) -> logging.FileHandler:
|
||||
"""Returns a file handler for logging."""
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d')
|
||||
@@ -347,7 +354,13 @@ logging.getLogger('LiteLLM Proxy').disabled = True
|
||||
class LlmFileHandler(logging.FileHandler):
|
||||
"""LLM prompt and response logging."""
|
||||
|
||||
def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
|
||||
def __init__(
|
||||
self,
|
||||
filename: str,
|
||||
mode: str = 'a',
|
||||
encoding: str = 'utf-8',
|
||||
delay: bool = False,
|
||||
) -> None:
|
||||
"""Initializes an instance of LlmFileHandler.
|
||||
|
||||
Args:
|
||||
@@ -378,7 +391,7 @@ class LlmFileHandler(logging.FileHandler):
|
||||
self.baseFilename = os.path.join(self.log_directory, filename)
|
||||
super().__init__(self.baseFilename, mode, encoding, delay)
|
||||
|
||||
def emit(self, record):
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
"""Emits a log record.
|
||||
|
||||
Args:
|
||||
@@ -393,7 +406,7 @@ class LlmFileHandler(logging.FileHandler):
|
||||
self.message_counter += 1
|
||||
|
||||
|
||||
def _get_llm_file_handler(name: str, log_level: int):
|
||||
def _get_llm_file_handler(name: str, log_level: int) -> LlmFileHandler:
|
||||
# The 'delay' parameter, when set to True, postpones the opening of the log file
|
||||
# until the first log message is emitted.
|
||||
llm_file_handler = LlmFileHandler(name, delay=True)
|
||||
@@ -402,7 +415,7 @@ def _get_llm_file_handler(name: str, log_level: int):
|
||||
return llm_file_handler
|
||||
|
||||
|
||||
def _setup_llm_logger(name: str, log_level: int):
|
||||
def _setup_llm_logger(name: str, log_level: int) -> logging.Logger:
|
||||
logger = logging.getLogger(name)
|
||||
logger.propagate = False
|
||||
logger.setLevel(log_level)
|
||||
|
||||
@@ -15,7 +15,9 @@ class Content(BaseModel):
|
||||
cache_prompt: bool = False
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
def serialize_model(
|
||||
self,
|
||||
) -> dict[str, str | dict[str, str]] | list[dict[str, str | dict[str, str]]]:
|
||||
raise NotImplementedError('Subclasses should implement this method.')
|
||||
|
||||
|
||||
@@ -24,7 +26,7 @@ class TextContent(Content):
|
||||
text: str
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
def serialize_model(self) -> dict[str, str | dict[str, str]]:
|
||||
data: dict[str, str | dict[str, str]] = {
|
||||
'type': self.type,
|
||||
'text': self.text,
|
||||
@@ -39,7 +41,7 @@ class ImageContent(Content):
|
||||
image_urls: list[str]
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
def serialize_model(self) -> list[dict[str, str | dict[str, str]]]:
|
||||
images: list[dict[str, str | dict[str, str]]] = []
|
||||
for url in self.image_urls:
|
||||
images.append({'type': self.type, 'image_url': {'url': url}})
|
||||
@@ -101,15 +103,22 @@ class Message(BaseModel):
|
||||
# See discussion here for details: https://github.com/BerriAI/litellm/issues/6422#issuecomment-2438765472
|
||||
if self.role == 'tool' and item.cache_prompt:
|
||||
role_tool_with_prompt_caching = True
|
||||
if isinstance(d, dict):
|
||||
d.pop('cache_control')
|
||||
elif isinstance(d, list):
|
||||
for d_item in d:
|
||||
d_item.pop('cache_control')
|
||||
if isinstance(item, TextContent):
|
||||
d.pop('cache_control', None)
|
||||
elif isinstance(item, ImageContent):
|
||||
# ImageContent.model_dump() always returns a list
|
||||
# We know d is a list of dicts for ImageContent
|
||||
if hasattr(d, '__iter__'):
|
||||
for d_item in d:
|
||||
if hasattr(d_item, 'pop'):
|
||||
d_item.pop('cache_control', None)
|
||||
|
||||
if isinstance(item, TextContent):
|
||||
content.append(d)
|
||||
elif isinstance(item, ImageContent) and self.vision_enabled:
|
||||
content.extend(d)
|
||||
# ImageContent.model_dump() always returns a list
|
||||
# We know d is a list for ImageContent
|
||||
content.extend([d] if isinstance(d, dict) else d)
|
||||
|
||||
message_dict: dict = {'content': content, 'role': self.role}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ from openhands.events.observation import (
|
||||
from openhands.events.observation.error import ErrorObservation
|
||||
from openhands.events.observation.observation import Observation
|
||||
from openhands.events.serialization.event import truncate_content
|
||||
from openhands.llm.metrics import Metrics, TokenUsage
|
||||
|
||||
|
||||
def events_to_messages(
|
||||
@@ -159,7 +160,7 @@ def get_action_message(
|
||||
)
|
||||
|
||||
llm_response: ModelResponse = tool_metadata.model_response
|
||||
assistant_msg = llm_response.choices[0].message
|
||||
assistant_msg = getattr(llm_response.choices[0], 'message')
|
||||
|
||||
# Add the LLM message (assistant) that initiated the tool calls
|
||||
# (overwrites any previous message with the same response_id)
|
||||
@@ -167,7 +168,7 @@ def get_action_message(
|
||||
f'Tool calls type: {type(assistant_msg.tool_calls)}, value: {assistant_msg.tool_calls}'
|
||||
)
|
||||
pending_tool_call_action_messages[llm_response.id] = Message(
|
||||
role=assistant_msg.role,
|
||||
role=getattr(assistant_msg, 'role', 'assistant'),
|
||||
# tool call content SHOULD BE a string
|
||||
content=[TextContent(text=assistant_msg.content or '')]
|
||||
if assistant_msg.content is not None
|
||||
@@ -184,7 +185,7 @@ def get_action_message(
|
||||
tool_metadata = action.tool_call_metadata
|
||||
if tool_metadata is not None:
|
||||
# take the response message from the tool call
|
||||
assistant_msg = tool_metadata.model_response.choices[0].message
|
||||
assistant_msg = getattr(tool_metadata.model_response.choices[0], 'message')
|
||||
content = assistant_msg.content or ''
|
||||
|
||||
# save content if any, to thought
|
||||
@@ -196,9 +197,11 @@ def get_action_message(
|
||||
|
||||
# remove the tool call metadata
|
||||
action.tool_call_metadata = None
|
||||
if role not in ('user', 'system', 'assistant', 'tool'):
|
||||
raise ValueError(f'Invalid role: {role}')
|
||||
return [
|
||||
Message(
|
||||
role=role,
|
||||
role=role, # type: ignore[arg-type]
|
||||
content=[TextContent(text=action.thought)],
|
||||
)
|
||||
]
|
||||
@@ -207,9 +210,11 @@ def get_action_message(
|
||||
content = [TextContent(text=action.content or '')]
|
||||
if vision_is_active and action.image_urls:
|
||||
content.append(ImageContent(image_urls=action.image_urls))
|
||||
if role not in ('user', 'system', 'assistant', 'tool'):
|
||||
raise ValueError(f'Invalid role: {role}')
|
||||
return [
|
||||
Message(
|
||||
role=role,
|
||||
role=role, # type: ignore[arg-type]
|
||||
content=content,
|
||||
)
|
||||
]
|
||||
@@ -217,7 +222,7 @@ def get_action_message(
|
||||
content = [TextContent(text=f'User executed the command:\n{action.command}')]
|
||||
return [
|
||||
Message(
|
||||
role='user',
|
||||
role='user', # Always user for CmdRunAction
|
||||
content=content,
|
||||
)
|
||||
]
|
||||
@@ -362,3 +367,47 @@ def apply_prompt_caching(messages: list[Message]) -> None:
|
||||
-1
|
||||
].cache_prompt = True # Last item inside the message content
|
||||
break
|
||||
|
||||
|
||||
def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | None:
|
||||
"""
|
||||
Returns at most one token usage record for the `model_response.id` in this event's
|
||||
`tool_call_metadata`.
|
||||
|
||||
If no response_id is found, or none match in metrics.token_usages, returns None.
|
||||
"""
|
||||
if event.tool_call_metadata and event.tool_call_metadata.model_response:
|
||||
response_id = event.tool_call_metadata.model_response.get('id')
|
||||
if response_id:
|
||||
return next(
|
||||
(
|
||||
usage
|
||||
for usage in metrics.token_usages
|
||||
if usage.response_id == response_id
|
||||
),
|
||||
None,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_token_usage_for_event_id(
|
||||
events: list[Event], event_id: int, metrics: Metrics
|
||||
) -> TokenUsage | None:
|
||||
"""
|
||||
Starting from the event with .id == event_id and moving backwards in `events`,
|
||||
find the first TokenUsage record (if any) associated with a response_id from
|
||||
tool_call_metadata.model_response.id.
|
||||
|
||||
Returns the first match found, or None if none is found.
|
||||
"""
|
||||
# find the index of the event with the given id
|
||||
idx = next((i for i, e in enumerate(events) if e.id == event_id), None)
|
||||
if idx is None:
|
||||
return None
|
||||
|
||||
# search backward from idx down to 0
|
||||
for i in range(idx, -1, -1):
|
||||
usage = get_token_usage_for_event(events[i], metrics)
|
||||
if usage is not None:
|
||||
return usage
|
||||
return None
|
||||
|
||||
@@ -42,6 +42,7 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,)
|
||||
# cache prompt supporting models
|
||||
# remove this when we gemini and deepseek are supported
|
||||
CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-haiku-20241022',
|
||||
@@ -51,6 +52,7 @@ CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
|
||||
# function calling supporting models
|
||||
FUNCTION_CALLING_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
@@ -497,20 +499,21 @@ class LLM(RetryMixin, DebugMixin):
|
||||
stats += 'Response Latency: %.3f seconds\n' % latest_latency.latency
|
||||
|
||||
usage: Usage | None = response.get('usage')
|
||||
response_id = response.get('id', 'unknown')
|
||||
|
||||
if usage:
|
||||
# keep track of the input and output tokens
|
||||
input_tokens = usage.get('prompt_tokens')
|
||||
output_tokens = usage.get('completion_tokens')
|
||||
prompt_tokens = usage.get('prompt_tokens', 0)
|
||||
completion_tokens = usage.get('completion_tokens', 0)
|
||||
|
||||
if input_tokens:
|
||||
stats += 'Input tokens: ' + str(input_tokens)
|
||||
if prompt_tokens:
|
||||
stats += 'Input tokens: ' + str(prompt_tokens)
|
||||
|
||||
if output_tokens:
|
||||
if completion_tokens:
|
||||
stats += (
|
||||
(' | ' if input_tokens else '')
|
||||
(' | ' if prompt_tokens else '')
|
||||
+ 'Output tokens: '
|
||||
+ str(output_tokens)
|
||||
+ str(completion_tokens)
|
||||
+ '\n'
|
||||
)
|
||||
|
||||
@@ -519,7 +522,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
'prompt_tokens_details'
|
||||
)
|
||||
cache_hit_tokens = (
|
||||
prompt_tokens_details.cached_tokens if prompt_tokens_details else None
|
||||
prompt_tokens_details.cached_tokens if prompt_tokens_details else 0
|
||||
)
|
||||
if cache_hit_tokens:
|
||||
stats += 'Input tokens (cache hit): ' + str(cache_hit_tokens) + '\n'
|
||||
@@ -528,10 +531,20 @@ class LLM(RetryMixin, DebugMixin):
|
||||
# but litellm doesn't separate them in the usage stats
|
||||
# so we can read it from the provider-specific extra field
|
||||
model_extra = usage.get('model_extra', {})
|
||||
cache_write_tokens = model_extra.get('cache_creation_input_tokens')
|
||||
cache_write_tokens = model_extra.get('cache_creation_input_tokens', 0)
|
||||
if cache_write_tokens:
|
||||
stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'
|
||||
|
||||
# Record in metrics
|
||||
# We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
|
||||
self.metrics.add_token_usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_hit_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
response_id=response_id,
|
||||
)
|
||||
|
||||
# log the stats
|
||||
if stats:
|
||||
logger.debug(stats)
|
||||
|
||||
@@ -17,11 +17,23 @@ class ResponseLatency(BaseModel):
|
||||
response_id: str
|
||||
|
||||
|
||||
class TokenUsage(BaseModel):
|
||||
"""Metric tracking detailed token usage per completion call."""
|
||||
|
||||
model: str
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
cache_read_tokens: int
|
||||
cache_write_tokens: int
|
||||
response_id: str
|
||||
|
||||
|
||||
class Metrics:
|
||||
"""Metrics class can record various metrics during running and evaluation.
|
||||
Currently, we define the following metrics:
|
||||
accumulated_cost: the total cost (USD $) of the current LLM.
|
||||
response_latency: the time taken for each LLM completion call.
|
||||
We track:
|
||||
- accumulated_cost and costs
|
||||
- A list of ResponseLatency
|
||||
- A list of TokenUsage (one per call).
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: str = 'default') -> None:
|
||||
@@ -29,6 +41,7 @@ class Metrics:
|
||||
self._costs: list[Cost] = []
|
||||
self._response_latencies: list[ResponseLatency] = []
|
||||
self.model_name = model_name
|
||||
self._token_usages: list[TokenUsage] = []
|
||||
|
||||
@property
|
||||
def accumulated_cost(self) -> float:
|
||||
@@ -54,6 +67,16 @@ class Metrics:
|
||||
def response_latencies(self, value: list[ResponseLatency]) -> None:
|
||||
self._response_latencies = value
|
||||
|
||||
@property
|
||||
def token_usages(self) -> list[TokenUsage]:
|
||||
if not hasattr(self, '_token_usages'):
|
||||
self._token_usages = []
|
||||
return self._token_usages
|
||||
|
||||
@token_usages.setter
|
||||
def token_usages(self, value: list[TokenUsage]) -> None:
|
||||
self._token_usages = value
|
||||
|
||||
def add_cost(self, value: float) -> None:
|
||||
if value < 0:
|
||||
raise ValueError('Added cost cannot be negative.')
|
||||
@@ -67,10 +90,33 @@ class Metrics:
|
||||
)
|
||||
)
|
||||
|
||||
def add_token_usage(
|
||||
self,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cache_read_tokens: int,
|
||||
cache_write_tokens: int,
|
||||
response_id: str,
|
||||
) -> None:
|
||||
"""Add a single usage record."""
|
||||
self._token_usages.append(
|
||||
TokenUsage(
|
||||
model=self.model_name,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_read_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
response_id=response_id,
|
||||
)
|
||||
)
|
||||
|
||||
def merge(self, other: 'Metrics') -> None:
|
||||
"""Merge 'other' metrics into this one."""
|
||||
self._accumulated_cost += other.accumulated_cost
|
||||
self._costs += other._costs
|
||||
self._response_latencies += other._response_latencies
|
||||
# use the property so older picked objects that lack the field won't crash
|
||||
self.token_usages += other.token_usages
|
||||
self.response_latencies += other.response_latencies
|
||||
|
||||
def get(self) -> dict:
|
||||
"""Return the metrics in a dictionary."""
|
||||
@@ -80,12 +126,14 @@ class Metrics:
|
||||
'response_latencies': [
|
||||
latency.model_dump() for latency in self._response_latencies
|
||||
],
|
||||
'token_usages': [usage.model_dump() for usage in self._token_usages],
|
||||
}
|
||||
|
||||
def reset(self):
|
||||
self._accumulated_cost = 0.0
|
||||
self._costs = []
|
||||
self._response_latencies = []
|
||||
self._token_usages = []
|
||||
|
||||
def log(self):
|
||||
"""Log the metrics."""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from openhands.memory.condenser import Condenser
|
||||
from openhands.memory.memory import LongTermMemory
|
||||
from openhands.memory.long_term_memory import LongTermMemory
|
||||
|
||||
__all__ = ['LongTermMemory', 'Condenser']
|
||||
|
||||
@@ -18,7 +18,7 @@ class ImportantEventSelection(BaseModel):
|
||||
class LLMAttentionCondenser(RollingCondenser):
|
||||
"""Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""
|
||||
|
||||
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0):
|
||||
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
|
||||
if keep_first >= max_size // 2:
|
||||
raise ValueError(
|
||||
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
|
||||
|
||||
@@ -8,7 +8,7 @@ from openhands.memory.condenser.condenser import Condenser
|
||||
class RecentEventsCondenser(Condenser):
|
||||
"""A condenser that only keeps a certain number of the most recent events."""
|
||||
|
||||
def __init__(self, keep_first: int = 0, max_events: int = 10):
|
||||
def __init__(self, keep_first: int = 1, max_events: int = 10):
|
||||
self.keep_first = keep_first
|
||||
self.max_events = max_events
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.runtime.impl.daytona.daytona_runtime import DaytonaRuntime
|
||||
from openhands.runtime.impl.docker.docker_runtime import (
|
||||
DockerRuntime,
|
||||
)
|
||||
@@ -24,6 +25,8 @@ def get_runtime_cls(name: str):
|
||||
return RunloopRuntime
|
||||
elif name == 'local':
|
||||
return LocalRuntime
|
||||
elif name == 'daytona':
|
||||
return DaytonaRuntime
|
||||
else:
|
||||
raise ValueError(f'Runtime {name} not supported')
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ class DockerRuntimeBuilder(RuntimeBuilder):
|
||||
"""
|
||||
self.docker_client = docker.from_env()
|
||||
version_info = self.docker_client.version()
|
||||
server_version = version_info.get('Version', '').replace('-', '.')
|
||||
server_version = version_info.get('Version', '').split('+')[0].replace('-', '.')
|
||||
if tuple(map(int, server_version.split('.'))) < (18, 9):
|
||||
raise AgentRuntimeBuildError(
|
||||
'Docker server version must be >= 18.09 to use BuildKit'
|
||||
|
||||
24
openhands/runtime/impl/daytona/README.md
Normal file
24
openhands/runtime/impl/daytona/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Daytona Runtime
|
||||
|
||||
[Daytona](https://www.daytona.io/) is a platform that provides a secure and elastic infrastructure for running AI-generated code. It provides all the necessary features for an AI Agent to interact with a codebase. It provides a Daytona SDK with official Python and TypeScript interfaces for interacting with Daytona, enabling you to programmatically manage development environments and execute code.
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Sign in at https://app.daytona.io/
|
||||
|
||||
1. Generate and copy your API key
|
||||
|
||||
1. Set the following environment variables before running the OpenHands app on your local machine or via a `docker run` command:
|
||||
|
||||
```bash
|
||||
RUNTIME="daytona"
|
||||
DAYTONA_API_KEY="<your-api-key>"
|
||||
```
|
||||
Optionally, if you don't want your sandboxes to default to the US region, set:
|
||||
|
||||
```bash
|
||||
DAYTONA_TARGET="eu"
|
||||
```
|
||||
|
||||
## Documentation
|
||||
Read more by visiting our [documentation](https://www.daytona.io/docs/) page.
|
||||
262
openhands/runtime/impl/daytona/daytona_runtime.py
Normal file
262
openhands/runtime/impl/daytona/daytona_runtime.py
Normal file
@@ -0,0 +1,262 @@
|
||||
import json
|
||||
from typing import Callable
|
||||
|
||||
import tenacity
|
||||
from daytona_sdk import (
|
||||
CreateWorkspaceParams,
|
||||
Daytona,
|
||||
DaytonaConfig,
|
||||
SessionExecuteRequest,
|
||||
Workspace,
|
||||
)
|
||||
|
||||
from openhands.core.config.app_config import AppConfig
|
||||
from openhands.events.stream import EventStream
|
||||
from openhands.runtime.impl.action_execution.action_execution_client import (
|
||||
ActionExecutionClient,
|
||||
)
|
||||
from openhands.runtime.plugins.requirement import PluginRequirement
|
||||
from openhands.runtime.utils.command import get_action_execution_server_startup_command
|
||||
from openhands.utils.async_utils import call_sync_from_async
|
||||
from openhands.utils.tenacity_stop import stop_if_should_exit
|
||||
|
||||
WORKSPACE_PREFIX = 'openhands-sandbox-'
|
||||
|
||||
|
||||
class DaytonaRuntime(ActionExecutionClient):
|
||||
"""The DaytonaRuntime class is a DockerRuntime that utilizes Daytona workspace as a runtime environment."""
|
||||
|
||||
_sandbox_port: int = 4444
|
||||
_vscode_port: int = 4445
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: AppConfig,
|
||||
event_stream: EventStream,
|
||||
sid: str = 'default',
|
||||
plugins: list[PluginRequirement] | None = None,
|
||||
env_vars: dict[str, str] | None = None,
|
||||
status_callback: Callable | None = None,
|
||||
attach_to_existing: bool = False,
|
||||
headless_mode: bool = True,
|
||||
):
|
||||
assert config.daytona_api_key, 'Daytona API key is required'
|
||||
|
||||
self.config = config
|
||||
self.sid = sid
|
||||
self.workspace_id = WORKSPACE_PREFIX + sid
|
||||
self.workspace: Workspace | None = None
|
||||
self._vscode_url: str | None = None
|
||||
|
||||
daytona_config = DaytonaConfig(
|
||||
api_key=config.daytona_api_key.get_secret_value(),
|
||||
server_url=config.daytona_api_url,
|
||||
target=config.daytona_target,
|
||||
)
|
||||
self.daytona = Daytona(daytona_config)
|
||||
|
||||
# workspace_base cannot be used because we can't bind mount into a workspace.
|
||||
if self.config.workspace_base is not None:
|
||||
self.log(
|
||||
'warning',
|
||||
'Workspace mounting is not supported in the Daytona runtime.',
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
config,
|
||||
event_stream,
|
||||
sid,
|
||||
plugins,
|
||||
env_vars,
|
||||
status_callback,
|
||||
attach_to_existing,
|
||||
headless_mode,
|
||||
)
|
||||
|
||||
def _get_workspace(self) -> Workspace | None:
|
||||
try:
|
||||
workspace = self.daytona.get_current_workspace(self.workspace_id)
|
||||
self.log(
|
||||
'info', f'Attached to existing workspace with id: {self.workspace_id}'
|
||||
)
|
||||
except Exception:
|
||||
self.log(
|
||||
'warning',
|
||||
f'Failed to attach to existing workspace with id: {self.workspace_id}',
|
||||
)
|
||||
workspace = None
|
||||
|
||||
return workspace
|
||||
|
||||
def _get_creation_env_vars(self) -> dict[str, str]:
|
||||
env_vars: dict[str, str] = {
|
||||
'port': str(self._sandbox_port),
|
||||
'PYTHONUNBUFFERED': '1',
|
||||
'VSCODE_PORT': str(self._vscode_port),
|
||||
}
|
||||
|
||||
if self.config.debug:
|
||||
env_vars['DEBUG'] = 'true'
|
||||
|
||||
return env_vars
|
||||
|
||||
def _create_workspace(self) -> Workspace:
|
||||
workspace_params = CreateWorkspaceParams(
|
||||
id=self.workspace_id,
|
||||
language='python',
|
||||
image=self.config.sandbox.runtime_container_image,
|
||||
public=True,
|
||||
env_vars=self._get_creation_env_vars(),
|
||||
)
|
||||
workspace = self.daytona.create(workspace_params)
|
||||
return workspace
|
||||
|
||||
def _get_workspace_status(self) -> str:
|
||||
assert self.workspace is not None, 'Workspace is not initialized'
|
||||
assert (
|
||||
self.workspace.instance.info is not None
|
||||
), 'Workspace info is not available'
|
||||
assert (
|
||||
self.workspace.instance.info.provider_metadata is not None
|
||||
), 'Provider metadata is not available'
|
||||
|
||||
provider_metadata = json.loads(self.workspace.instance.info.provider_metadata)
|
||||
return provider_metadata.get('status', 'unknown')
|
||||
|
||||
def _construct_api_url(self, port: int) -> str:
|
||||
assert self.workspace is not None, 'Workspace is not initialized'
|
||||
assert (
|
||||
self.workspace.instance.info is not None
|
||||
), 'Workspace info is not available'
|
||||
assert (
|
||||
self.workspace.instance.info.provider_metadata is not None
|
||||
), 'Provider metadata is not available'
|
||||
|
||||
node_domain = json.loads(self.workspace.instance.info.provider_metadata)[
|
||||
'nodeDomain'
|
||||
]
|
||||
return f'https://{port}-{self.workspace.id}.{node_domain}'
|
||||
|
||||
def _get_action_execution_server_host(self) -> str:
|
||||
return self.api_url
|
||||
|
||||
def _start_action_execution_server(self) -> None:
|
||||
assert self.workspace is not None, 'Workspace is not initialized'
|
||||
|
||||
self.workspace.process.exec(
|
||||
f'mkdir -p {self.config.workspace_mount_path_in_sandbox}'
|
||||
)
|
||||
|
||||
start_command: list[str] = get_action_execution_server_startup_command(
|
||||
server_port=self._sandbox_port,
|
||||
plugins=self.plugins,
|
||||
app_config=self.config,
|
||||
override_user_id=1000,
|
||||
override_username='openhands',
|
||||
)
|
||||
start_command_str: str = ' '.join(start_command)
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f'Starting action execution server with command: {start_command_str}',
|
||||
)
|
||||
|
||||
exec_session_id = 'action-execution-server'
|
||||
self.workspace.process.create_session(exec_session_id)
|
||||
self.workspace.process.execute_session_command(
|
||||
exec_session_id,
|
||||
SessionExecuteRequest(command='cd /openhands/code', var_async=True),
|
||||
)
|
||||
|
||||
exec_command = self.workspace.process.execute_session_command(
|
||||
exec_session_id,
|
||||
SessionExecuteRequest(command=start_command_str, var_async=True),
|
||||
)
|
||||
|
||||
self.log('debug', f'exec_command_id: {exec_command.cmd_id}')
|
||||
|
||||
@tenacity.retry(
|
||||
stop=tenacity.stop_after_delay(120) | stop_if_should_exit(),
|
||||
wait=tenacity.wait_fixed(1),
|
||||
reraise=(ConnectionRefusedError,),
|
||||
)
|
||||
def _wait_until_alive(self):
|
||||
super().check_if_alive()
|
||||
|
||||
async def connect(self):
|
||||
self.send_status_message('STATUS$STARTING_RUNTIME')
|
||||
|
||||
if self.attach_to_existing:
|
||||
self.workspace = await call_sync_from_async(self._get_workspace)
|
||||
|
||||
if self.workspace is None:
|
||||
self.send_status_message('STATUS$PREPARING_CONTAINER')
|
||||
self.workspace = await call_sync_from_async(self._create_workspace)
|
||||
self.log('info', f'Created new workspace with id: {self.workspace_id}')
|
||||
|
||||
if self._get_workspace_status() == 'stopped':
|
||||
self.log('info', 'Starting Daytona workspace...')
|
||||
await call_sync_from_async(self.workspace.start)
|
||||
|
||||
self.api_url = await call_sync_from_async(
|
||||
self._construct_api_url, self._sandbox_port
|
||||
)
|
||||
|
||||
if not self.attach_to_existing:
|
||||
await call_sync_from_async(self._start_action_execution_server)
|
||||
self.log(
|
||||
'info',
|
||||
f'Container started. Action execution server url: {self.api_url}',
|
||||
)
|
||||
|
||||
self.log('info', 'Waiting for client to become ready...')
|
||||
self.send_status_message('STATUS$WAITING_FOR_CLIENT')
|
||||
await call_sync_from_async(self._wait_until_alive)
|
||||
|
||||
if not self.attach_to_existing:
|
||||
await call_sync_from_async(self.setup_initial_env)
|
||||
|
||||
self.log(
|
||||
'info',
|
||||
f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}',
|
||||
)
|
||||
|
||||
if not self.attach_to_existing:
|
||||
self.send_status_message(' ')
|
||||
self._runtime_initialized = True
|
||||
|
||||
def close(self):
|
||||
super().close()
|
||||
|
||||
if self.attach_to_existing:
|
||||
return
|
||||
|
||||
if self.workspace:
|
||||
self.daytona.remove(self.workspace)
|
||||
|
||||
@property
|
||||
def vscode_url(self) -> str | None:
|
||||
if self._vscode_url is not None: # cached value
|
||||
return self._vscode_url
|
||||
token = super().get_vscode_token()
|
||||
if not token:
|
||||
self.log(
|
||||
'warning', 'Failed to get VSCode token while trying to get VSCode URL'
|
||||
)
|
||||
return None
|
||||
if not self.workspace:
|
||||
self.log(
|
||||
'warning', 'Workspace is not initialized while trying to get VSCode URL'
|
||||
)
|
||||
return None
|
||||
self._vscode_url = (
|
||||
self._construct_api_url(self._vscode_port)
|
||||
+ f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'
|
||||
)
|
||||
|
||||
self.log(
|
||||
'debug',
|
||||
f'VSCode URL: {self._vscode_url}',
|
||||
)
|
||||
|
||||
return self._vscode_url
|
||||
@@ -307,11 +307,17 @@ class InvariantAnalyzer(SecurityAnalyzer):
|
||||
new_elements = parse_element(self.trace, event)
|
||||
input = [e.model_dump(exclude_none=True) for e in new_elements] # type: ignore [call-overload]
|
||||
self.trace.extend(new_elements)
|
||||
result, err = self.monitor.check(self.input, input)
|
||||
check_result = self.monitor.check(self.input, input)
|
||||
self.input.extend(input)
|
||||
risk = ActionSecurityRisk.UNKNOWN
|
||||
if err:
|
||||
logger.warning(f'Error checking policy: {err}')
|
||||
|
||||
if isinstance(check_result, tuple):
|
||||
result, err = check_result
|
||||
if err:
|
||||
logger.warning(f'Error checking policy: {err}')
|
||||
return risk
|
||||
else:
|
||||
logger.warning(f'Error checking policy: {check_result}')
|
||||
return risk
|
||||
|
||||
risk = self.get_risk(result)
|
||||
|
||||
@@ -50,7 +50,7 @@ class InvariantClient:
|
||||
return None
|
||||
|
||||
class _Policy:
|
||||
def __init__(self, invariant):
|
||||
def __init__(self, invariant: 'InvariantClient') -> None:
|
||||
self.server = invariant.server
|
||||
self.session_id = invariant.session_id
|
||||
|
||||
@@ -77,7 +77,7 @@ class InvariantClient:
|
||||
except (ConnectionError, Timeout, HTTPError) as err:
|
||||
return None, err
|
||||
|
||||
def from_string(self, rule: str):
|
||||
def from_string(self, rule: str) -> 'InvariantClient._Policy':
|
||||
policy_id, err = self._create_policy(rule)
|
||||
if err:
|
||||
raise err
|
||||
@@ -97,7 +97,7 @@ class InvariantClient:
|
||||
return None, err
|
||||
|
||||
class _Monitor:
|
||||
def __init__(self, invariant):
|
||||
def __init__(self, invariant: 'InvariantClient') -> None:
|
||||
self.server = invariant.server
|
||||
self.session_id = invariant.session_id
|
||||
self.policy = ''
|
||||
@@ -114,7 +114,7 @@ class InvariantClient:
|
||||
except (ConnectionError, Timeout, HTTPError) as err:
|
||||
return None, err
|
||||
|
||||
def from_string(self, rule: str):
|
||||
def from_string(self, rule: str) -> 'InvariantClient._Monitor':
|
||||
monitor_id, err = self._create_monitor(rule)
|
||||
if err:
|
||||
raise err
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from typing import Any, Iterable, Tuple
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic.dataclasses import dataclass
|
||||
|
||||
@@ -10,7 +11,7 @@ class LLM:
|
||||
|
||||
class Event(BaseModel):
|
||||
metadata: dict | None = Field(
|
||||
default_factory=dict, description='Metadata associated with the event'
|
||||
default_factory=lambda: dict(), description='Metadata associated with the event'
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +31,7 @@ class Message(Event):
|
||||
content: str | None
|
||||
tool_calls: list[ToolCall] | None = None
|
||||
|
||||
def __rich_repr__(self):
|
||||
def __rich_repr__(self) -> Iterable[Any | tuple[Any] | tuple[str, Any] | tuple[str, Any, Any]]:
|
||||
# Print on separate line
|
||||
yield 'role', self.role
|
||||
yield 'content', self.content
|
||||
|
||||
146
poetry.lock
generated
146
poetry.lock
generated
@@ -1507,6 +1507,47 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0
|
||||
torch = ["torch"]
|
||||
vision = ["Pillow (>=9.4.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "daytona-api-client"
|
||||
version = "0.13.0"
|
||||
description = "Daytona Workspaces"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "daytona_api_client-0.13.0-py3-none-any.whl", hash = "sha256:c4d0dcb89a328c4d0a97d8f076eaf9a00ccc54a8b9f862f4b3302ae887d03c8f"},
|
||||
{file = "daytona_api_client-0.13.0.tar.gz", hash = "sha256:d62b7cb14361b2706df192d2da7dc2b5d02be6fd4259e9433cf2bfdc5807416d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pydantic = ">=2"
|
||||
python-dateutil = ">=2.8.2"
|
||||
typing-extensions = ">=4.7.1"
|
||||
urllib3 = ">=1.25.3,<3.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "daytona-sdk"
|
||||
version = "0.9.1"
|
||||
description = "Python SDK for Daytona"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "daytona_sdk-0.9.1-py3-none-any.whl", hash = "sha256:cce6c90cd3d578747b3c388e24c811cb0b21ad125d34b32836c50059a577a12a"},
|
||||
{file = "daytona_sdk-0.9.1.tar.gz", hash = "sha256:1e2f219f55130fc72d2f14a57d008b8d3e236d45294e0ca51e249106be5ca5de"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
daytona_api_client = ">=0.13.0,<1.0.0"
|
||||
environs = ">=9.5.0,<10.0.0"
|
||||
marshmallow = ">=3.19.0,<4.0.0"
|
||||
pydantic = ">=2.4.2,<3.0.0"
|
||||
python-dateutil = ">=2.8.2,<3.0.0"
|
||||
urllib3 = ">=2.0.7,<3.0.0"
|
||||
|
||||
[package.extras]
|
||||
dev = ["black (>=22.0.0)", "isort (>=5.10.0)", "pydoc-markdown (>=4.8.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "debugpy"
|
||||
version = "1.8.12"
|
||||
@@ -1731,6 +1772,28 @@ files = [
|
||||
{file = "english-words-2.0.1.tar.gz", hash = "sha256:a4105c57493bb757a3d8973fcf8e1dc05e7ca09c836dff467c3fb445f84bc43d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "environs"
|
||||
version = "9.5.0"
|
||||
description = "simplified environment variable parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"},
|
||||
{file = "environs-9.5.0.tar.gz", hash = "sha256:a76307b36fbe856bdca7ee9161e6c466fd7fcffc297109a118c59b54e27e30c9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
marshmallow = ">=3.0.0"
|
||||
python-dotenv = "*"
|
||||
|
||||
[package.extras]
|
||||
dev = ["dj-database-url", "dj-email-url", "django-cache-url", "flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)", "pytest", "tox"]
|
||||
django = ["dj-database-url", "dj-email-url", "django-cache-url"]
|
||||
lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"]
|
||||
tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "evaluate"
|
||||
version = "0.4.3"
|
||||
@@ -3132,14 +3195,14 @@ zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "huggingface-hub"
|
||||
version = "0.28.1"
|
||||
version = "0.29.0"
|
||||
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
|
||||
optional = false
|
||||
python-versions = ">=3.8.0"
|
||||
groups = ["main", "evaluation", "llama-index"]
|
||||
files = [
|
||||
{file = "huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7"},
|
||||
{file = "huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae"},
|
||||
{file = "huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe"},
|
||||
{file = "huggingface_hub-0.29.0.tar.gz", hash = "sha256:64034c852be270cac16c5743fe1f659b14515a9de6342d6f42cbb2ede191fc80"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -4004,14 +4067,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "kubernetes"
|
||||
version = "32.0.0"
|
||||
version = "32.0.1"
|
||||
description = "Kubernetes python client"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["llama-index"]
|
||||
files = [
|
||||
{file = "kubernetes-32.0.0-py2.py3-none-any.whl", hash = "sha256:60fd8c29e8e43d9c553ca4811895a687426717deba9c0a66fb2dcc3f5ef96692"},
|
||||
{file = "kubernetes-32.0.0.tar.gz", hash = "sha256:319fa840345a482001ac5d6062222daeb66ec4d1bcb3087402aed685adf0aecb"},
|
||||
{file = "kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998"},
|
||||
{file = "kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -4786,7 +4849,7 @@ version = "3.26.1"
|
||||
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["evaluation", "llama-index"]
|
||||
groups = ["main", "evaluation", "llama-index"]
|
||||
files = [
|
||||
{file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"},
|
||||
{file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"},
|
||||
@@ -4933,14 +4996,14 @@ urllib3 = "*"
|
||||
|
||||
[[package]]
|
||||
name = "mistune"
|
||||
version = "3.1.1"
|
||||
version = "3.1.2"
|
||||
description = "A sane and fast Markdown parser with useful plugins and renderers"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["runtime"]
|
||||
files = [
|
||||
{file = "mistune-3.1.1-py3-none-any.whl", hash = "sha256:02106ac2aa4f66e769debbfa028509a275069dcffce0dfa578edd7b991ee700a"},
|
||||
{file = "mistune-3.1.1.tar.gz", hash = "sha256:e0740d635f515119f7d1feb6f9b192ee60f0cc649f80a8f944f905706a21654c"},
|
||||
{file = "mistune-3.1.2-py3-none-any.whl", hash = "sha256:4b47731332315cdca99e0ded46fc0004001c1299ff773dfb48fbe1fd226de319"},
|
||||
{file = "mistune-3.1.2.tar.gz", hash = "sha256:733bf018ba007e8b5f2d3a9eb624034f6ee26c4ea769a98ec533ee111d504dff"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8391,33 +8454,34 @@ pathspec = ">=0.10.1"
|
||||
|
||||
[[package]]
|
||||
name = "scikit-image"
|
||||
version = "0.25.1"
|
||||
version = "0.25.2"
|
||||
description = "Image processing in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["evaluation"]
|
||||
files = [
|
||||
{file = "scikit_image-0.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40763a3a089617e6f00f92d46b3475368b9783588a165c2aa854da95b66bb4ff"},
|
||||
{file = "scikit_image-0.25.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:7c6b69f33e5512ee7fc53361b064430f146583f08dc75317667e81d5f8fcd0c6"},
|
||||
{file = "scikit_image-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9187347d115776ff0ddba3e5d2a04638d291b1a62e3c315d17b71eea351cde8"},
|
||||
{file = "scikit_image-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdfca713979ad1873a4b55d94bb1eb4bc713f0c10165b261bf6f7e606f44a00c"},
|
||||
{file = "scikit_image-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:167fb146de80bb2a1493d1a760a9ac81644a8a5de254c3dd12a95d1b662d819c"},
|
||||
{file = "scikit_image-0.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c1bde2d5f1dfb23b3c72ef9fcdb2dd5f42fa353e8bd606aea63590eba5e79565"},
|
||||
{file = "scikit_image-0.25.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5112d95cccaa45c434e57efc20c1f721ab439e516e2ed49709ddc2afb7c15c70"},
|
||||
{file = "scikit_image-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f5e313b028f5d7a9f3888ad825ddf4fb78913d7762891abb267b99244b4dd31"},
|
||||
{file = "scikit_image-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39ad76aeff754048dabaff83db752aa0655dee425f006678d14485471bdb459d"},
|
||||
{file = "scikit_image-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:8dc8b06176c1a2316fa8bc539fd7e96155721628ae5cf51bc1a2c62cb9786581"},
|
||||
{file = "scikit_image-0.25.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ebf83699d60134909647395a0bf07db3859646de7192b088e656deda6bc15e95"},
|
||||
{file = "scikit_image-0.25.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:408086520eed036340e634ab7e4f648e00238f711bac61ab933efeb11464a238"},
|
||||
{file = "scikit_image-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bd709faa87795869ccd21f32490c37989ca5846571495822f4b9430fb42c34c"},
|
||||
{file = "scikit_image-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b15c0265c072a46ff4720784d756d8f8e5d63567639aa8451f6673994d6846"},
|
||||
{file = "scikit_image-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:a689a0d091e0bd97d7767309abdeb27c43be210d075abb34e71657add920c22b"},
|
||||
{file = "scikit_image-0.25.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f070f899d6572a125ab106c4b26d1a5fb784dc60ba6dea45c7816f08c3a4fb4d"},
|
||||
{file = "scikit_image-0.25.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:cc9538d8db7670878aa68ea79c0b1796b6c771085e8d50f5408ee617da3281b6"},
|
||||
{file = "scikit_image-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caa08d4fa851e1f421fcad8eac24d32f2810971dc61f1d72dc950ca9e9ec39b1"},
|
||||
{file = "scikit_image-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9923aa898b7921fbcf503d32574d48ed937a7cff45ce8587be4868b39676e18"},
|
||||
{file = "scikit_image-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:6c7bba6773ab8c39ee8b1cbb17c7f98965bacdb8cd8da337942be6acc38fc562"},
|
||||
{file = "scikit_image-0.25.1.tar.gz", hash = "sha256:d4ab30540d114d37c35fe5c837f89b94aaba2a7643afae8354aa353319e9bbbb"},
|
||||
{file = "scikit_image-0.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d3278f586793176599df6a4cf48cb6beadae35c31e58dc01a98023af3dc31c78"},
|
||||
{file = "scikit_image-0.25.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5c311069899ce757d7dbf1d03e32acb38bb06153236ae77fcd820fd62044c063"},
|
||||
{file = "scikit_image-0.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be455aa7039a6afa54e84f9e38293733a2622b8c2fb3362b822d459cc5605e99"},
|
||||
{file = "scikit_image-0.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c464b90e978d137330be433df4e76d92ad3c5f46a22f159520ce0fdbea8a09"},
|
||||
{file = "scikit_image-0.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:60516257c5a2d2f74387c502aa2f15a0ef3498fbeaa749f730ab18f0a40fd054"},
|
||||
{file = "scikit_image-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f4bac9196fb80d37567316581c6060763b0f4893d3aca34a9ede3825bc035b17"},
|
||||
{file = "scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d989d64ff92e0c6c0f2018c7495a5b20e2451839299a018e0e5108b2680f71e0"},
|
||||
{file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2cfc96b27afe9a05bc92f8c6235321d3a66499995675b27415e0d0c76625173"},
|
||||
{file = "scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24cc986e1f4187a12aa319f777b36008764e856e5013666a4a83f8df083c2641"},
|
||||
{file = "scikit_image-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4f6b61fc2db6340696afe3db6b26e0356911529f5f6aee8c322aa5157490c9b"},
|
||||
{file = "scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8db8dd03663112783221bf01ccfc9512d1cc50ac9b5b0fe8f4023967564719fb"},
|
||||
{file = "scikit_image-0.25.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:483bd8cc10c3d8a7a37fae36dfa5b21e239bd4ee121d91cad1f81bba10cfb0ed"},
|
||||
{file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d1e80107bcf2bf1291acfc0bf0425dceb8890abe9f38d8e94e23497cbf7ee0d"},
|
||||
{file = "scikit_image-0.25.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17e17eb8562660cc0d31bb55643a4da996a81944b82c54805c91b3fe66f4824"},
|
||||
{file = "scikit_image-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:bdd2b8c1de0849964dbc54037f36b4e9420157e67e45a8709a80d727f52c7da2"},
|
||||
{file = "scikit_image-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7efa888130f6c548ec0439b1a7ed7295bc10105458a421e9bf739b457730b6da"},
|
||||
{file = "scikit_image-0.25.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dd8011efe69c3641920614d550f5505f83658fe33581e49bed86feab43a180fc"},
|
||||
{file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28182a9d3e2ce3c2e251383bdda68f8d88d9fff1a3ebe1eb61206595c9773341"},
|
||||
{file = "scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147"},
|
||||
{file = "scikit_image-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:64785a8acefee460ec49a354706db0b09d1f325674107d7fa3eadb663fb56d6f"},
|
||||
{file = "scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd"},
|
||||
{file = "scikit_image-0.25.2.tar.gz", hash = "sha256:e5a37e6cd4d0c018a7a55b9d601357e3382826d3888c10d0213fc63bff977dde"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -8427,16 +8491,16 @@ networkx = ">=3.0"
|
||||
numpy = ">=1.24"
|
||||
packaging = ">=21"
|
||||
pillow = ">=10.1"
|
||||
scipy = ">=1.11.2"
|
||||
scipy = ">=1.11.4"
|
||||
tifffile = ">=2022.8.12"
|
||||
|
||||
[package.extras]
|
||||
build = ["Cython (>=3.0.8)", "build (>=1.2.1)", "meson-python (>=0.16)", "ninja (>=1.11.1.1)", "numpy (>=2.0)", "pythran (>=0.16)", "setuptools (>=68)", "spin (==0.13)"]
|
||||
build = ["Cython (>=3.0.8)", "build (>=1.2.1)", "meson-python (>=0.16)", "ninja (>=1.11.1.1)", "numpy (>=2.0)", "pythran (>=0.16)", "spin (==0.13)"]
|
||||
data = ["pooch (>=1.6.0)"]
|
||||
developer = ["ipython", "pre-commit", "tomli"]
|
||||
docs = ["PyWavelets (>=1.6)", "dask[array] (>=2022.9.2)", "intersphinx-registry (>=0.2411.14)", "ipykernel", "ipywidgets", "kaleido (==0.2.1)", "matplotlib (>=3.7)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=2.0)", "plotly (>=5.20)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.16)", "pytest-doctestplus", "scikit-learn (>=1.2)", "seaborn (>=0.11)", "sphinx (>=8.0)", "sphinx-copybutton", "sphinx-gallery[parallel] (>=0.18)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"]
|
||||
optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=0.2.1)", "dask[array] (>=2021.1.0,!=2024.8.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"]
|
||||
test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=7.0)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]
|
||||
docs = ["PyWavelets (>=1.6)", "dask[array] (>=2023.2.0)", "intersphinx-registry (>=0.2411.14)", "ipykernel", "ipywidgets", "kaleido (==0.2.1)", "matplotlib (>=3.7)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=2.0)", "plotly (>=5.20)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.16)", "pytest-doctestplus", "scikit-learn (>=1.2)", "seaborn (>=0.11)", "sphinx (>=8.0)", "sphinx-copybutton", "sphinx-gallery[parallel] (>=0.18)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"]
|
||||
optional = ["PyWavelets (>=1.6)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=1.1.1)", "dask[array] (>=2023.2.0)", "matplotlib (>=3.7)", "pooch (>=1.6.0)", "pyamg (>=5.2)", "scikit-learn (>=1.2)"]
|
||||
test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=8)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]
|
||||
|
||||
[[package]]
|
||||
name = "scikit-learn"
|
||||
@@ -9206,14 +9270,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "tifffile"
|
||||
version = "2025.1.10"
|
||||
version = "2025.2.18"
|
||||
description = "Read and write TIFF files"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["evaluation"]
|
||||
files = [
|
||||
{file = "tifffile-2025.1.10-py3-none-any.whl", hash = "sha256:ed24cf4c99fb13b4f5fb29f8a0d5605e60558c950bccbdca2a6470732a27cfb3"},
|
||||
{file = "tifffile-2025.1.10.tar.gz", hash = "sha256:baaf0a3b87bf7ec375fa1537503353f70497eabe1bdde590f2e41cc0346e612f"},
|
||||
{file = "tifffile-2025.2.18-py3-none-any.whl", hash = "sha256:54b36c4d5e5b8d8920134413edfe5a7cfb1c7617bb50cddf7e2772edb7149043"},
|
||||
{file = "tifffile-2025.2.18.tar.gz", hash = "sha256:8d731789e691b468746c1615d989bc550ac93cf753e9210865222e90a5a95d11"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -10789,4 +10853,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "14998d54438fedacad9d82422003f46d0d7721bd50c2f8096657c15dce0f3edd"
|
||||
content-hash = "39e0f069346a4d1e52193899989b79ea3e02f81d67fbb2ac0fdc87e70bd1008f"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "openhands-ai"
|
||||
version = "0.25.0"
|
||||
version = "0.26.0"
|
||||
description = "OpenHands: Code Less, Make More"
|
||||
authors = ["OpenHands"]
|
||||
license = "MIT"
|
||||
@@ -76,6 +76,7 @@ stripe = "^11.5.0"
|
||||
ipywidgets = "^8.1.5"
|
||||
qtconsole = "^5.6.1"
|
||||
memory-profiler = "^0.61.0"
|
||||
daytona-sdk = "0.9.1"
|
||||
|
||||
[tool.poetry.group.llama-index.dependencies]
|
||||
llama-index = "*"
|
||||
@@ -108,7 +109,6 @@ reportlab = "*"
|
||||
[tool.coverage.run]
|
||||
concurrency = ["gevent"]
|
||||
|
||||
|
||||
[tool.poetry.group.runtime.dependencies]
|
||||
jupyterlab = "*"
|
||||
notebook = "*"
|
||||
@@ -137,7 +137,6 @@ ignore = ["D1"]
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
|
||||
[tool.poetry.group.evaluation.dependencies]
|
||||
streamlit = "*"
|
||||
whatthepatch = "*"
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
timeout: 120 # 2 minutes
|
||||
required: true
|
||||
@@ -1 +0,0 @@
|
||||
Create a bash script called hello.sh that prints "hello world"
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set +x
|
||||
|
||||
echo "checking hello world"
|
||||
pwd
|
||||
ls -lah
|
||||
|
||||
# Check if hello.sh exists
|
||||
if [ ! -f hello.sh ]; then
|
||||
echo "hello.sh does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if it's executable
|
||||
if [ ! -x hello.sh ]; then
|
||||
echo "hello.sh is not executable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run and check output
|
||||
output=$(./hello.sh)
|
||||
if [ "$output" != "hello world" ]; then
|
||||
echo "Expected 'hello world' but got: $output"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
@@ -1,158 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
import openhands.agenthub # noqa: F401 - import to register agents
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.main import auto_continue_response, run_controller
|
||||
from openhands.events.action import MessageAction
|
||||
|
||||
|
||||
def run_test_case(case_dir: Path) -> bool:
|
||||
"""Run a single test case.
|
||||
|
||||
Args:
|
||||
case_dir: Path to the test case directory
|
||||
|
||||
Returns:
|
||||
bool: True if test passed, False if failed
|
||||
"""
|
||||
case_name = case_dir.name
|
||||
print(f'Running test case: {case_name}')
|
||||
|
||||
# Read case configuration
|
||||
timeout = 120 # Default timeout 2 minutes
|
||||
required = True
|
||||
case_yaml = case_dir / 'case.yaml'
|
||||
|
||||
if case_yaml.exists():
|
||||
with open(case_yaml) as f:
|
||||
config: Optional[Dict[str, Any]] = yaml.safe_load(f)
|
||||
if config:
|
||||
timeout = config.get('timeout', timeout)
|
||||
required = config.get('required', required)
|
||||
|
||||
# Create workspace directory
|
||||
workspace_dir = case_dir / 'workspace'
|
||||
if workspace_dir.exists():
|
||||
# Clean up any existing workspace
|
||||
shutil.rmtree(workspace_dir)
|
||||
workspace_dir.mkdir(exist_ok=True)
|
||||
temp_path = workspace_dir
|
||||
temp_dir = str(workspace_dir)
|
||||
temp_dir_ctx = None
|
||||
|
||||
if not os.getenv('NO_CLEANUP'):
|
||||
|
||||
class WorkspaceCleanup:
|
||||
def __init__(self, workspace_path: Path):
|
||||
self.workspace_path = workspace_path
|
||||
|
||||
def cleanup(self):
|
||||
if self.workspace_path.exists():
|
||||
shutil.rmtree(self.workspace_path)
|
||||
|
||||
temp_dir_ctx = WorkspaceCleanup(workspace_dir)
|
||||
|
||||
try:
|
||||
# Check if git repo and commit-ish are specified
|
||||
if case_yaml.exists():
|
||||
with open(case_yaml) as f:
|
||||
config = yaml.safe_load(f)
|
||||
if config and 'git' in config:
|
||||
repo = config['git']
|
||||
commit = config.get('commit-ish', 'main')
|
||||
os.system(f'git clone {repo} {temp_dir}')
|
||||
os.system(f'cd {temp_dir} && git checkout {commit}')
|
||||
|
||||
# Copy prompt and test script
|
||||
shutil.copy2(case_dir / 'prompt.txt', temp_path / 'prompt.txt')
|
||||
shutil.copy2(case_dir / 'test.sh', temp_path / 'test.sh')
|
||||
os.chmod(temp_path / 'test.sh', 0o755) # Make test.sh executable
|
||||
|
||||
# Read the prompt
|
||||
with open(case_dir / 'prompt.txt') as f:
|
||||
task_str = f.read()
|
||||
|
||||
# Set up OpenHands configuration
|
||||
config = AppConfig()
|
||||
config.name = case_name
|
||||
config.agent_cls = 'CodeActAgent'
|
||||
config.max_budget_per_task = 100
|
||||
config.max_iterations = 100
|
||||
config.cli_multiline_input = False
|
||||
config.config_file = str(Path(__file__).parent.parent.parent / 'config.toml')
|
||||
config.workspace_base = str(temp_path)
|
||||
config.workspace_mount_path = str(temp_path)
|
||||
config.workspace_mount_path_in_sandbox = '/workspace'
|
||||
config.sandbox.keep_runtime_alive = False
|
||||
config.save_trajectory_path = str(temp_path / 'trajectory.json')
|
||||
initial_user_action = MessageAction(content=task_str)
|
||||
|
||||
# Change to temp directory for test execution
|
||||
original_cwd = os.getcwd()
|
||||
os.chdir(temp_dir)
|
||||
|
||||
try:
|
||||
# Run OpenHands
|
||||
asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=initial_user_action,
|
||||
fake_user_response_fn=auto_continue_response,
|
||||
headless_mode=True,
|
||||
)
|
||||
)
|
||||
|
||||
# Run the test script
|
||||
test_result = os.system('./test.sh')
|
||||
if test_result != 0:
|
||||
print(f'Test case {case_name} failed')
|
||||
if required:
|
||||
return False
|
||||
else:
|
||||
print(f'Test case {case_name} passed')
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error running test case {case_name}: {e}')
|
||||
if required:
|
||||
return False
|
||||
return True
|
||||
finally:
|
||||
os.chdir(original_cwd)
|
||||
finally:
|
||||
if temp_dir_ctx is not None:
|
||||
temp_dir_ctx.cleanup()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Run all regression tests."""
|
||||
# Find and run all test cases
|
||||
regression_dir = Path(__file__).parent
|
||||
cases_dir = regression_dir / 'cases'
|
||||
|
||||
all_passed = True
|
||||
for case_dir in cases_dir.iterdir():
|
||||
if case_dir.is_dir():
|
||||
if not run_test_case(case_dir):
|
||||
all_passed = False
|
||||
|
||||
if all_passed:
|
||||
print('All tests completed successfully')
|
||||
sys.exit(0)
|
||||
else:
|
||||
print('Some tests failed')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -11,6 +11,7 @@ from openhands.core.config import AppConfig, load_app_config
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventStream
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.runtime.impl.daytona.daytona_runtime import DaytonaRuntime
|
||||
from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
|
||||
from openhands.runtime.impl.local.local_runtime import LocalRuntime
|
||||
from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
|
||||
@@ -130,6 +131,8 @@ def get_runtime_classes() -> list[type[Runtime]]:
|
||||
return [RemoteRuntime]
|
||||
elif runtime.lower() == 'runloop':
|
||||
return [RunloopRuntime]
|
||||
elif runtime.lower() == 'daytona':
|
||||
return [DaytonaRuntime]
|
||||
else:
|
||||
raise ValueError(f'Invalid runtime: {runtime}')
|
||||
|
||||
|
||||
101
tests/unit/test_cli_sid.py
Normal file
101
tests/unit/test_cli_sid.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import asyncio
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.core.cli import main
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.schema import AgentState
|
||||
from openhands.events.event import EventSource
|
||||
from openhands.events.observation import AgentStateChangedObservation
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_runtime():
|
||||
with patch('openhands.core.cli.create_runtime') as mock_create_runtime:
|
||||
mock_runtime_instance = AsyncMock()
|
||||
# Mock the event stream with proper async methods
|
||||
mock_runtime_instance.event_stream = AsyncMock()
|
||||
mock_runtime_instance.event_stream.subscribe = AsyncMock()
|
||||
mock_runtime_instance.event_stream.add_event = AsyncMock()
|
||||
# Mock connect method to return immediately
|
||||
mock_runtime_instance.connect = AsyncMock()
|
||||
# Ensure status_callback is None
|
||||
mock_runtime_instance.status_callback = None
|
||||
mock_create_runtime.return_value = mock_runtime_instance
|
||||
yield mock_runtime_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_agent():
|
||||
with patch('openhands.core.cli.create_agent') as mock_create_agent:
|
||||
mock_agent_instance = AsyncMock()
|
||||
mock_create_agent.return_value = mock_agent_instance
|
||||
yield mock_agent_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_controller():
|
||||
with patch('openhands.core.cli.create_controller') as mock_create_controller:
|
||||
mock_controller_instance = AsyncMock()
|
||||
# Mock run_until_done to finish immediately
|
||||
mock_controller_instance.run_until_done = AsyncMock(return_value=None)
|
||||
mock_create_controller.return_value = (mock_controller_instance, None)
|
||||
yield mock_controller_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def task_file(tmp_path: Path) -> Path:
|
||||
# Create a temporary file with our task
|
||||
task_file = tmp_path / 'task.txt'
|
||||
task_file.write_text('Ask me what your task is')
|
||||
return task_file
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config(task_file: Path):
|
||||
with patch('openhands.core.cli.parse_arguments') as mock_parse_args:
|
||||
# Create a proper Namespace with our temporary task file
|
||||
args = Namespace(file=str(task_file), task=None, directory=None)
|
||||
mock_parse_args.return_value = args
|
||||
with patch('openhands.core.cli.setup_config_from_args') as mock_setup_config:
|
||||
mock_config = AppConfig()
|
||||
mock_setup_config.return_value = mock_config
|
||||
yield mock_config
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cli_session_id_output(
|
||||
mock_runtime, mock_agent, mock_controller, mock_config, capsys
|
||||
):
|
||||
# status_callback is set when initializing the runtime
|
||||
mock_controller.status_callback = None
|
||||
|
||||
# Use input patch just for the exit command
|
||||
with patch('builtins.input', return_value='exit'):
|
||||
# Create a task for main
|
||||
main_task = asyncio.create_task(main(asyncio.get_event_loop()))
|
||||
|
||||
# Give it a moment to display the session ID
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Trigger agent state change to STOPPED to end the main loop
|
||||
event = AgentStateChangedObservation(
|
||||
content='Stop', agent_state=AgentState.STOPPED
|
||||
)
|
||||
event._source = EventSource.AGENT
|
||||
await mock_runtime.event_stream.add_event(event)
|
||||
|
||||
# Wait for main to finish with a timeout
|
||||
try:
|
||||
await asyncio.wait_for(main_task, timeout=1.0)
|
||||
except asyncio.TimeoutError:
|
||||
main_task.cancel()
|
||||
|
||||
# Check the output
|
||||
captured = capsys.readouterr()
|
||||
assert 'Session ID:' in captured.out
|
||||
# Also verify that our task message was processed
|
||||
assert 'Ask me what your task is' in str(mock_runtime.mock_calls)
|
||||
@@ -38,7 +38,7 @@ def create_test_event(
|
||||
event = Event()
|
||||
event._message = message
|
||||
event.timestamp = timestamp if timestamp else datetime.now()
|
||||
if id:
|
||||
if id is not None:
|
||||
event._id = id
|
||||
event._source = EventSource.USER
|
||||
return event
|
||||
@@ -186,13 +186,14 @@ def test_recent_events_condenser():
|
||||
assert result == events
|
||||
|
||||
# If the max_events are smaller than the number of events, only keep the last few.
|
||||
max_events = 2
|
||||
max_events = 3
|
||||
condenser = RecentEventsCondenser(max_events=max_events)
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == max_events
|
||||
assert result[0]._message == 'Event 4'
|
||||
assert result[1]._message == 'Event 5'
|
||||
assert result[0]._message == 'Event 1' # kept from keep_first
|
||||
assert result[1]._message == 'Event 4' # kept from max_events
|
||||
assert result[2]._message == 'Event 5' # kept from max_events
|
||||
|
||||
# If the keep_first flag is set, the first event will always be present.
|
||||
keep_first = 1
|
||||
@@ -211,9 +212,9 @@ def test_recent_events_condenser():
|
||||
result = condenser.condensed_history(mock_state)
|
||||
|
||||
assert len(result) == max_events
|
||||
assert result[0]._message == 'Event 1'
|
||||
assert result[1]._message == 'Event 2'
|
||||
assert result[2]._message == 'Event 5'
|
||||
assert result[0]._message == 'Event 1' # kept from keep_first
|
||||
assert result[1]._message == 'Event 2' # kept from keep_first
|
||||
assert result[2]._message == 'Event 5' # kept from max_events
|
||||
|
||||
|
||||
def test_llm_summarization_condenser_from_config():
|
||||
@@ -539,7 +540,7 @@ def test_llm_attention_condenser_forgets_when_larger_than_max_size(
|
||||
):
|
||||
"""Test that the LLMAttentionCondenser forgets events when the context grows too large."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
@@ -560,7 +561,7 @@ def test_llm_attention_condenser_forgets_when_larger_than_max_size(
|
||||
def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser handles event IDs that aren't from the event history."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
@@ -580,7 +581,7 @@ def test_llm_attention_condenser_handles_events_outside_history(mock_llm, mock_s
|
||||
def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser handles when the response contains too many event IDs."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
@@ -600,7 +601,9 @@ def test_llm_attention_condenser_handles_too_many_events(mock_llm, mock_state):
|
||||
def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state):
|
||||
"""Test that the LLMAttentionCondenser handles when the response contains too few event IDs."""
|
||||
max_size = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, llm=mock_llm)
|
||||
# Developer note: We must specify keep_first=0 because
|
||||
# keep_first (1) >= max_size//2 (1) is invalid.
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=0, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 10):
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
@@ -614,3 +617,33 @@ def test_llm_attention_condenser_handles_too_few_events(mock_llm, mock_state):
|
||||
|
||||
# The number of results should bounce back and forth between 1, 2, 1, 2, ...
|
||||
assert len(results) == (i % 2) + 1
|
||||
|
||||
# Add a new test verifying that keep_first=1 works with max_size > 2
|
||||
|
||||
|
||||
def test_llm_attention_condenser_handles_keep_first_for_larger_max_size(
|
||||
mock_llm, mock_state
|
||||
):
|
||||
"""Test that LLMAttentionCondenser works when keep_first=1 is allowed (must be less than half of max_size)."""
|
||||
max_size = 4 # so keep_first=1 < (max_size // 2) = 2
|
||||
condenser = LLMAttentionCondenser(max_size=max_size, keep_first=1, llm=mock_llm)
|
||||
|
||||
for i in range(max_size * 2):
|
||||
# We append new events, then ensure some are pruned.
|
||||
event = create_test_event(f'Event {i}', id=i)
|
||||
mock_state.history.append(event)
|
||||
|
||||
mock_llm.set_mock_response_content(
|
||||
ImportantEventSelection(ids=[]).model_dump_json()
|
||||
)
|
||||
|
||||
results = condenser.condensed_history(mock_state)
|
||||
|
||||
# We expect that the first event is always kept, and the tail grows until max_size
|
||||
if len(mock_state.history) <= max_size:
|
||||
# No condensation needed yet
|
||||
assert len(results) == len(mock_state.history)
|
||||
else:
|
||||
# The first event is kept, plus some from the tail
|
||||
assert results[0].id == 0
|
||||
assert len(results) <= max_size
|
||||
|
||||
@@ -686,6 +686,7 @@ def test_api_keys_repr_str():
|
||||
modal_api_token_id='my_modal_api_token_id',
|
||||
modal_api_token_secret='my_modal_api_token_secret',
|
||||
runloop_api_key='my_runloop_api_key',
|
||||
daytona_api_key='my_daytona_api_key',
|
||||
)
|
||||
assert 'my_e2b_api_key' not in repr(app_config)
|
||||
assert 'my_e2b_api_key' not in str(app_config)
|
||||
@@ -697,6 +698,8 @@ def test_api_keys_repr_str():
|
||||
assert 'my_modal_api_token_secret' not in str(app_config)
|
||||
assert 'my_runloop_api_key' not in repr(app_config)
|
||||
assert 'my_runloop_api_key' not in str(app_config)
|
||||
assert 'my_daytona_api_key' not in repr(app_config)
|
||||
assert 'my_daytona_api_key' not in str(app_config)
|
||||
|
||||
# Check that no other attrs in AppConfig have 'key' or 'token' in their name
|
||||
# This will fail when new attrs are added, and attract attention
|
||||
@@ -705,6 +708,7 @@ def test_api_keys_repr_str():
|
||||
'modal_api_token_id',
|
||||
'modal_api_token_secret',
|
||||
'runloop_api_key',
|
||||
'daytona_api_key',
|
||||
]
|
||||
for attr_name in AppConfig.model_fields.keys():
|
||||
if (
|
||||
|
||||
169
tests/unit/test_config_extended.py
Normal file
169
tests/unit/test_config_extended.py
Normal file
@@ -0,0 +1,169 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.core.config.app_config import AppConfig
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.utils import load_from_toml
|
||||
|
||||
|
||||
def test_extended_config_from_dict():
|
||||
"""
|
||||
Test that ExtendedConfig.from_dict successfully creates an instance
|
||||
from a dictionary containing arbitrary extra keys.
|
||||
"""
|
||||
data = {'foo': 'bar', 'baz': 123, 'flag': True}
|
||||
ext_cfg = ExtendedConfig.from_dict(data)
|
||||
|
||||
# Check that the keys are accessible both as attributes and via __getitem__
|
||||
assert ext_cfg.foo == 'bar'
|
||||
assert ext_cfg['baz'] == 123
|
||||
assert ext_cfg.flag is True
|
||||
# Verify the root dictionary contains all keys
|
||||
assert ext_cfg.root == data
|
||||
|
||||
|
||||
def test_extended_config_empty():
|
||||
"""
|
||||
Test that an empty ExtendedConfig can be created and accessed.
|
||||
"""
|
||||
ext_cfg = ExtendedConfig.from_dict({})
|
||||
assert ext_cfg.root == {}
|
||||
|
||||
# Creating directly should also work
|
||||
ext_cfg2 = ExtendedConfig({})
|
||||
assert ext_cfg2.root == {}
|
||||
|
||||
|
||||
def test_extended_config_str_and_repr():
|
||||
"""
|
||||
Test that __str__ and __repr__ return the correct string representations
|
||||
of the ExtendedConfig instance.
|
||||
"""
|
||||
data = {'alpha': 'test', 'beta': 42}
|
||||
ext_cfg = ExtendedConfig.from_dict(data)
|
||||
string_repr = str(ext_cfg)
|
||||
repr_str = repr(ext_cfg)
|
||||
|
||||
# Ensure the representations include our key/value pairs
|
||||
assert "alpha='test'" in string_repr
|
||||
assert 'beta=42' in string_repr
|
||||
|
||||
# __repr__ should match __str__
|
||||
assert string_repr == repr_str
|
||||
|
||||
|
||||
def test_extended_config_getitem_and_getattr():
|
||||
"""
|
||||
Test that __getitem__ and __getattr__ can be used to access values
|
||||
in the ExtendedConfig instance.
|
||||
"""
|
||||
data = {'key1': 'value1', 'key2': 2}
|
||||
ext_cfg = ExtendedConfig.from_dict(data)
|
||||
|
||||
# Attribute access
|
||||
assert ext_cfg.key1 == 'value1'
|
||||
# Dictionary-style access
|
||||
assert ext_cfg['key2'] == 2
|
||||
|
||||
|
||||
def test_extended_config_invalid_key():
|
||||
"""
|
||||
Test that accessing a non-existent key via attribute access raises AttributeError.
|
||||
"""
|
||||
data = {'existing': 'yes'}
|
||||
ext_cfg = ExtendedConfig.from_dict(data)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
_ = ext_cfg.nonexistent
|
||||
|
||||
with pytest.raises(KeyError):
|
||||
_ = ext_cfg['nonexistent']
|
||||
|
||||
|
||||
def test_app_config_extended_from_toml(tmp_path: os.PathLike) -> None:
|
||||
"""
|
||||
Test that the [extended] section in a TOML file is correctly loaded into
|
||||
AppConfig.extended and that it accepts arbitrary keys.
|
||||
"""
|
||||
# Create a temporary TOML file with multiple sections including [extended]
|
||||
config_content = """
|
||||
[core]
|
||||
workspace_base = "/tmp/workspace"
|
||||
|
||||
[llm]
|
||||
model = "test-model"
|
||||
api_key = "toml-api-key"
|
||||
|
||||
[extended]
|
||||
custom1 = "custom_value"
|
||||
custom2 = 42
|
||||
llm = "overridden" # even a key like 'llm' is accepted in extended
|
||||
|
||||
[agent]
|
||||
memory_enabled = true
|
||||
"""
|
||||
config_file = tmp_path / 'config.toml'
|
||||
config_file.write_text(config_content)
|
||||
|
||||
# Load the TOML into the AppConfig instance
|
||||
config = AppConfig()
|
||||
load_from_toml(config, str(config_file))
|
||||
|
||||
# Verify that extended section is applied
|
||||
assert config.extended.custom1 == 'custom_value'
|
||||
assert config.extended.custom2 == 42
|
||||
# Even though 'llm' is defined in extended, it should not affect the main llm config.
|
||||
assert config.get_llm_config().model == 'test-model'
|
||||
|
||||
|
||||
def test_app_config_extended_default(tmp_path: os.PathLike) -> None:
|
||||
"""
|
||||
Test that if there is no [extended] section in the TOML file,
|
||||
AppConfig.extended remains its default (empty) ExtendedConfig.
|
||||
"""
|
||||
config_content = """
|
||||
[core]
|
||||
workspace_base = "/tmp/workspace"
|
||||
|
||||
[llm]
|
||||
model = "test-model"
|
||||
api_key = "toml-api-key"
|
||||
|
||||
[agent]
|
||||
memory_enabled = true
|
||||
"""
|
||||
config_file = tmp_path / 'config.toml'
|
||||
config_file.write_text(config_content)
|
||||
|
||||
config = AppConfig()
|
||||
load_from_toml(config, str(config_file))
|
||||
|
||||
# Extended config should be empty
|
||||
assert config.extended.root == {}
|
||||
|
||||
|
||||
def test_app_config_extended_random_keys(tmp_path: os.PathLike) -> None:
|
||||
"""
|
||||
Test that the extended section accepts arbitrary keys,
|
||||
including ones not defined in any schema.
|
||||
"""
|
||||
config_content = """
|
||||
[core]
|
||||
workspace_base = "/tmp/workspace"
|
||||
|
||||
[extended]
|
||||
random_key = "random_value"
|
||||
another_key = 3.14
|
||||
"""
|
||||
config_file = tmp_path / 'config.toml'
|
||||
config_file.write_text(config_content)
|
||||
|
||||
config = AppConfig()
|
||||
load_from_toml(config, str(config_file))
|
||||
|
||||
# Verify that extended config holds the arbitrary keys with correct values.
|
||||
assert config.extended.random_key == 'random_value'
|
||||
assert config.extended.another_key == 3.14
|
||||
# Verify the root dictionary contains all keys
|
||||
assert config.extended.root == {'random_key': 'random_value', 'another_key': 3.14}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user