mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6ba79c454b | |||
| fbc06f42aa | |||
| f35ed5e277 | |||
| 6787a3adf7 | |||
| fa50e0c9b9 | |||
| f4c5bbda19 | |||
| 6562297615 | |||
| 0217a7cfbd | |||
| aa15c9d385 | |||
| 8ad89e368a | |||
| 29ba94fc0f | |||
| 8956f92f6a | |||
| 753e3c4205 | |||
| ecd573febc | |||
| 325a558fbc | |||
| 666c186826 | |||
| 2d2dbf1561 | |||
| abac25cc4c | |||
| 70b21d16bd | |||
| bf82f75ae4 | |||
| a8bce3724f | |||
| e109f7e58e | |||
| a20f299579 | |||
| bf77da7849 | |||
| 869ea59ecd | |||
| f093c14ad3 | |||
| 9d3a0a02b8 | |||
| 35bab5070d | |||
| d03b9775b5 | |||
| fab4532f6b | |||
| d33913e036 | |||
| e52aee168e | |||
| c27b191358 | |||
| 22c5ad85d9 |
@@ -1,11 +1,12 @@
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces**
|
||||
|
||||
- [ ] Include this change in the Release Notes. If checked, you must provide an **end-user friendly** description for your change below
|
||||
|
||||
---
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions**
|
||||
- [ ] This change is worth documenting at https://docs.all-hands.dev/
|
||||
- [ ] Include this change in the Release Notes. If checked, you **must** provide an **end-user friendly** description for your change below
|
||||
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses**
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses.**
|
||||
|
||||
@@ -24,6 +24,10 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
|
||||
@@ -32,6 +32,10 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
@@ -44,7 +48,7 @@ jobs:
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Run Tests
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_long_term_memory.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
|
||||
+1
-1
@@ -100,7 +100,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.25-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.26-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
SHELL=/bin/bash
|
||||
SHELL=/usr/bin/env bash
|
||||
# Makefile for OpenHands project
|
||||
|
||||
# Variables
|
||||
@@ -81,10 +81,10 @@ check-nodejs:
|
||||
@if command -v node > /dev/null; then \
|
||||
NODE_VERSION=$(shell node --version | sed -E 's/v//g'); \
|
||||
IFS='.' read -r -a NODE_VERSION_ARRAY <<< "$$NODE_VERSION"; \
|
||||
if [ "$${NODE_VERSION_ARRAY[0]}" -ge 20 ]; then \
|
||||
if [ "$${NODE_VERSION_ARRAY[0]}" -ge 22 ]; then \
|
||||
echo "$(BLUE)Node.js $$NODE_VERSION is already installed.$(RESET)"; \
|
||||
else \
|
||||
echo "$(RED)Node.js 20.x or later is required. Please install Node.js 20.x or later to continue.$(RESET)"; \
|
||||
echo "$(RED)Node.js 22.x or later is required. Please install Node.js 22.x or later to continue.$(RESET)"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
else \
|
||||
|
||||
@@ -43,17 +43,17 @@ See the [Running OpenHands](https://docs.all-hands.dev/modules/usage/installatio
|
||||
system requirements and more information.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
@@ -17,6 +17,12 @@
|
||||
#modal_api_token_id = ""
|
||||
#modal_api_token_secret = ""
|
||||
|
||||
# API key for Daytona
|
||||
#daytona_api_key = ""
|
||||
|
||||
# Daytona Target
|
||||
#daytona_target = ""
|
||||
|
||||
# Base path for the workspace
|
||||
workspace_base = "./workspace"
|
||||
|
||||
@@ -234,6 +240,10 @@ codeact_enable_jupyter = true
|
||||
# List of microagents to disable
|
||||
#disabled_microagents = []
|
||||
|
||||
# Whether history should be truncated to continue the session when hitting LLM context
|
||||
# length limit
|
||||
enable_history_truncation = true
|
||||
|
||||
[agent.RepoExplorerAgent]
|
||||
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
|
||||
# useful when an agent doesn't demand high quality but uses a lot of tokens
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Initialize variables with default values
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.25-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.26-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -o pipefail
|
||||
|
||||
function get_docker() {
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
+2
-2
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -340,6 +340,11 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
|
||||
- Default: `false`
|
||||
- Description: Whether Jupyter is enabled in the action space
|
||||
|
||||
- `enable_history_truncation`
|
||||
- Type: `bool`
|
||||
- Default: `true`
|
||||
- Description: Whether history should be truncated to continue the session when hitting LLM context length limit
|
||||
|
||||
### Microagent Usage
|
||||
- `enable_prompt_extensions`
|
||||
- Type: `bool`
|
||||
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.25
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
|
||||
@@ -16,7 +16,7 @@ some flags being passed to `docker run` that make this possible:
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.25-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -20,6 +20,8 @@ To evaluate an agent, you can provide the agent's name to the `run_infer.py` pro
|
||||
### Evaluating Different LLMs
|
||||
|
||||
OpenHands in development mode uses `config.toml` to keep track of most configuration.
|
||||
**IMPORTANT: For evaluation, only the LLM section in `config.toml` will be used. Other configurations, such as `save_trajectory_path`, are not applied during evaluation.**
|
||||
|
||||
Here's an example configuration file you can use to define and use multiple LLMs:
|
||||
|
||||
```toml
|
||||
@@ -40,6 +42,8 @@ api_key = "XXX"
|
||||
temperature = 0.0
|
||||
```
|
||||
|
||||
For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
|
||||
|
||||
## Supported Benchmarks
|
||||
|
||||
The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Step 1: Stop all running containers
|
||||
echo "Stopping all running containers..."
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
RESULT_FILE=$1
|
||||
MODEL_CONFIG=$2
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
LEVEL=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is ONLY used for pushing docker images created by https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md
|
||||
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
INPUT_FILE=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
source ~/.bashrc
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
##################################################################################################
|
||||
# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.sh
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
echo "hello world"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
# API base URL
|
||||
|
||||
@@ -65,6 +65,12 @@ describe("extractModelAndProvider", () => {
|
||||
separator: "/",
|
||||
});
|
||||
|
||||
expect(extractModelAndProvider("claude-3-7-sonnet-20250219")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-3-7-sonnet-20250219",
|
||||
separator: "/",
|
||||
});
|
||||
|
||||
expect(extractModelAndProvider("claude-3-haiku-20240307")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-3-haiku-20240307",
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.25.0",
|
||||
"version": "0.26.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.25.0",
|
||||
"version": "0.26.0",
|
||||
"dependencies": {
|
||||
"@heroui/react": "2.6.14",
|
||||
"@monaco-editor/react": "^4.7.0-rc.0",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.25.0",
|
||||
"version": "0.26.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
|
||||
@@ -239,9 +239,6 @@ class OpenHands {
|
||||
body,
|
||||
);
|
||||
|
||||
// TODO: remove this once we have a multi-conversation UI
|
||||
localStorage.setItem("latest_conversation_id", data.conversation_id);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
@@ -73,11 +73,13 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
|
||||
<div
|
||||
ref={ref}
|
||||
data-testid="conversation-panel"
|
||||
className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto"
|
||||
className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto absolute"
|
||||
>
|
||||
<div className="pt-4 px-4 flex items-center justify-between">
|
||||
{isFetching && <LoadingSpinner size="small" />}
|
||||
</div>
|
||||
{isFetching && (
|
||||
<div className="w-full h-full absolute flex justify-center items-center">
|
||||
<LoadingSpinner size="small" />
|
||||
</div>
|
||||
)}
|
||||
{error && (
|
||||
<div className="flex flex-col items-center justify-center h-full">
|
||||
<p className="text-danger">{error.message}</p>
|
||||
|
||||
@@ -12,13 +12,14 @@ export function StyledSwitchComponent({
|
||||
className={cn(
|
||||
"w-12 h-6 rounded-xl flex items-center p-1.5 cursor-pointer",
|
||||
isToggled && "justify-end bg-primary",
|
||||
!isToggled && "justify-start bg-[#1F2228] border border-tertiary-alt",
|
||||
!isToggled &&
|
||||
"justify-start bg-base-secondary border border-tertiary-light",
|
||||
)}
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
"bg-[#1F2228] w-3 h-3 rounded-xl",
|
||||
isToggled ? "bg-[#1F2228]" : "bg-tertiary-alt",
|
||||
"w-3 h-3 rounded-xl",
|
||||
isToggled ? "bg-base-secondary" : "bg-tertiary-light",
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -10,7 +10,6 @@ import { useEndSession } from "#/hooks/use-end-session";
|
||||
import { ModalBackdrop } from "../modal-backdrop";
|
||||
import { ModelSelector } from "./model-selector";
|
||||
import { useCurrentSettings } from "#/context/settings-context";
|
||||
import { MEMORY_CONDENSER } from "#/utils/feature-flags";
|
||||
import { Settings } from "#/types/settings";
|
||||
import { BrandButton } from "#/components/features/settings/brand-button";
|
||||
import { KeyStatusIcon } from "#/components/features/settings/key-status-icon";
|
||||
@@ -44,9 +43,6 @@ export function SettingsForm({ settings, models, onClose }: SettingsFormProps) {
|
||||
const handleFormSubmission = async (formData: FormData) => {
|
||||
const newSettings = extractSettings(formData);
|
||||
|
||||
// Inject the condenser config from the current feature flag value
|
||||
newSettings.ENABLE_DEFAULT_CONDENSER = MEMORY_CONDENSER;
|
||||
|
||||
await saveUserSettings(newSettings);
|
||||
onClose();
|
||||
resetOngoingSession();
|
||||
|
||||
@@ -2,7 +2,6 @@ import { useMutation, useQueryClient } from "@tanstack/react-query";
|
||||
import { DEFAULT_SETTINGS } from "#/services/settings";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { PostSettings, PostApiSettings } from "#/types/settings";
|
||||
import { MEMORY_CONDENSER } from "#/utils/feature-flags";
|
||||
|
||||
const saveSettingsMutationFn = async (settings: Partial<PostSettings>) => {
|
||||
const resetLlmApiKey = settings.LLM_API_KEY === "";
|
||||
@@ -20,8 +19,7 @@ const saveSettingsMutationFn = async (settings: Partial<PostSettings>) => {
|
||||
remote_runtime_resource_factor: settings.REMOTE_RUNTIME_RESOURCE_FACTOR,
|
||||
github_token: settings.github_token,
|
||||
unset_github_token: settings.unset_github_token,
|
||||
enable_default_condenser:
|
||||
MEMORY_CONDENSER || settings.ENABLE_DEFAULT_CONDENSER,
|
||||
enable_default_condenser: settings.ENABLE_DEFAULT_CONDENSER,
|
||||
user_consents_to_analytics: settings.user_consents_to_analytics,
|
||||
};
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ i18n
|
||||
.init({
|
||||
fallbackLng: "en",
|
||||
debug: import.meta.env.NODE_ENV === "development",
|
||||
load: "languageOnly",
|
||||
});
|
||||
|
||||
export default i18n;
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import React from "react";
|
||||
import { useDispatch } from "react-redux";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import posthog from "posthog-js";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { setImportedProjectZip } from "#/state/initial-query-slice";
|
||||
import { convertZipToBase64 } from "#/utils/convert-zip-to-base64";
|
||||
import { useGitHubUser } from "#/hooks/query/use-github-user";
|
||||
@@ -14,7 +12,6 @@ import { HeroHeading } from "#/components/shared/hero-heading";
|
||||
import { TaskForm } from "#/components/shared/task-form";
|
||||
|
||||
function Home() {
|
||||
const { t } = useTranslation();
|
||||
const dispatch = useDispatch();
|
||||
const formRef = React.useRef<HTMLFormElement>(null);
|
||||
|
||||
@@ -26,8 +23,6 @@ function Home() {
|
||||
gitHubClientId: config?.GITHUB_CLIENT_ID || null,
|
||||
});
|
||||
|
||||
const latestConversation = localStorage.getItem("latest_conversation_id");
|
||||
|
||||
return (
|
||||
<div className="bg-base-secondary h-full rounded-xl flex flex-col items-center justify-center relative overflow-y-auto px-2">
|
||||
<HeroHeading />
|
||||
@@ -56,19 +51,6 @@ function Home() {
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
{latestConversation && (
|
||||
<div className="flex gap-4 w-full text-center mt-8">
|
||||
<p className="text-center w-full">
|
||||
{t(I18nKey.LANDING$OR)}
|
||||
<a
|
||||
className="underline"
|
||||
href={`/conversations/${latestConversation}`}
|
||||
>
|
||||
{t(I18nKey.LANDING$RECENT_CONVERSATION)}
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -102,6 +102,8 @@ function AccountSettings() {
|
||||
|
||||
const userConsentsToAnalytics =
|
||||
formData.get("enable-analytics-switch")?.toString() === "on";
|
||||
const enableMemoryCondenser =
|
||||
formData.get("enable-memory-condenser-switch")?.toString() === "on";
|
||||
|
||||
saveSettings(
|
||||
{
|
||||
@@ -109,6 +111,7 @@ function AccountSettings() {
|
||||
formData.get("github-token-input")?.toString() || undefined,
|
||||
LANGUAGE: languageValue,
|
||||
user_consents_to_analytics: userConsentsToAnalytics,
|
||||
ENABLE_DEFAULT_CONDENSER: enableMemoryCondenser,
|
||||
LLM_MODEL: customLlmModel || fullLlmModel,
|
||||
LLM_BASE_URL: formData.get("base-url-input")?.toString() || "",
|
||||
LLM_API_KEY:
|
||||
@@ -290,6 +293,17 @@ function AccountSettings() {
|
||||
Enable confirmation mode
|
||||
</SettingsSwitch>
|
||||
)}
|
||||
|
||||
{llmConfigMode === "advanced" && (
|
||||
<SettingsSwitch
|
||||
testId="enable-memory-condenser-switch"
|
||||
name="enable-memory-condenser-switch"
|
||||
defaultIsToggled={!!settings.ENABLE_DEFAULT_CONDENSER}
|
||||
>
|
||||
Enable memory condensation
|
||||
</SettingsSwitch>
|
||||
)}
|
||||
|
||||
{llmConfigMode === "advanced" && confirmationModeIsEnabled && (
|
||||
<div>
|
||||
<SettingsDropdownInput
|
||||
|
||||
@@ -12,7 +12,7 @@ export const DEFAULT_SETTINGS: Settings = {
|
||||
SECURITY_ANALYZER: "",
|
||||
REMOTE_RUNTIME_RESOURCE_FACTOR: 1,
|
||||
GITHUB_TOKEN_IS_SET: false,
|
||||
ENABLE_DEFAULT_CONDENSER: false,
|
||||
ENABLE_DEFAULT_CONDENSER: true,
|
||||
USER_CONSENTS_TO_ANALYTICS: false,
|
||||
};
|
||||
|
||||
|
||||
@@ -12,5 +12,4 @@ function loadFeatureFlag(
|
||||
}
|
||||
}
|
||||
|
||||
export const MEMORY_CONDENSER = loadFeatureFlag("MEMORY_CONDENSER");
|
||||
export const BILLING_SETTINGS = () => loadFeatureFlag("BILLING_SETTINGS");
|
||||
|
||||
@@ -25,6 +25,7 @@ const extractAdvancedFormData = (formData: FormData) => {
|
||||
let LLM_BASE_URL: string | undefined;
|
||||
let CONFIRMATION_MODE = false;
|
||||
let SECURITY_ANALYZER: string | undefined;
|
||||
let ENABLE_DEFAULT_CONDENSER = true;
|
||||
|
||||
if (isUsingAdvancedOptions) {
|
||||
CUSTOM_LLM_MODEL = formData.get("custom-model")?.toString();
|
||||
@@ -34,6 +35,7 @@ const extractAdvancedFormData = (formData: FormData) => {
|
||||
// only set securityAnalyzer if confirmationMode is enabled
|
||||
SECURITY_ANALYZER = formData.get("security-analyzer")?.toString();
|
||||
}
|
||||
ENABLE_DEFAULT_CONDENSER = keys.includes("enable-default-condenser");
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -41,6 +43,7 @@ const extractAdvancedFormData = (formData: FormData) => {
|
||||
LLM_BASE_URL,
|
||||
CONFIRMATION_MODE,
|
||||
SECURITY_ANALYZER,
|
||||
ENABLE_DEFAULT_CONDENSER,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -53,6 +56,7 @@ export const extractSettings = (formData: FormData): Partial<Settings> => {
|
||||
LLM_BASE_URL,
|
||||
CONFIRMATION_MODE,
|
||||
SECURITY_ANALYZER,
|
||||
ENABLE_DEFAULT_CONDENSER,
|
||||
} = extractAdvancedFormData(formData);
|
||||
|
||||
return {
|
||||
@@ -63,5 +67,6 @@ export const extractSettings = (formData: FormData): Partial<Settings> => {
|
||||
LLM_BASE_URL,
|
||||
CONFIRMATION_MODE,
|
||||
SECURITY_ANALYZER,
|
||||
ENABLE_DEFAULT_CONDENSER,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -3,6 +3,7 @@ export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic", "deepseek"];
|
||||
export const VERIFIED_MODELS = [
|
||||
"o3-mini-2025-01-31",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"claude-3-7-sonnet-20250219",
|
||||
"deepseek-chat",
|
||||
];
|
||||
|
||||
@@ -31,4 +32,5 @@ export const VERIFIED_ANTHROPIC_MODELS = [
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-3-7-sonnet-20250219",
|
||||
];
|
||||
|
||||
@@ -10,17 +10,24 @@ from openhands.events.action import (
|
||||
|
||||
|
||||
class BrowsingResponseParser(ResponseParser):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
# Need to pay attention to the item order in self.action_parsers
|
||||
super().__init__()
|
||||
self.action_parsers = [BrowsingActionParserMessage()]
|
||||
self.default_parser = BrowsingActionParserBrowseInteractive()
|
||||
|
||||
def parse(self, response: str) -> Action:
|
||||
action_str = self.parse_response(response)
|
||||
def parse(
|
||||
self, response: str | dict[str, list[dict[str, dict[str, str | None]]]]
|
||||
) -> Action:
|
||||
if isinstance(response, str):
|
||||
action_str = response
|
||||
else:
|
||||
action_str = self.parse_response(response)
|
||||
return self.parse_action(action_str)
|
||||
|
||||
def parse_response(self, response) -> str:
|
||||
def parse_response(
|
||||
self, response: dict[str, list[dict[str, dict[str, str | None]]]]
|
||||
) -> str:
|
||||
action_str = response['choices'][0]['message']['content']
|
||||
if action_str is None:
|
||||
return ''
|
||||
@@ -47,9 +54,7 @@ class BrowsingActionParserMessage(ActionParser):
|
||||
- BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
@@ -69,9 +74,7 @@ class BrowsingActionParserBrowseInteractive(ActionParser):
|
||||
- BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
|
||||
@@ -5,7 +5,7 @@ from warnings import warn
|
||||
import yaml
|
||||
|
||||
|
||||
def yaml_parser(message):
|
||||
def yaml_parser(message: str) -> tuple[dict, bool, str]:
|
||||
"""Parse a yaml message for the retry function."""
|
||||
# saves gpt-3.5 from some yaml parsing errors
|
||||
message = re.sub(r':\s*\n(?=\S|\n)', ': ', message)
|
||||
@@ -22,7 +22,9 @@ def yaml_parser(message):
|
||||
return value, valid, retry_message
|
||||
|
||||
|
||||
def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'):
|
||||
def _compress_chunks(
|
||||
text: str, identifier: str, skip_list: list[str], split_regex: str = '\n\n+'
|
||||
) -> tuple[dict[str, str], str]:
|
||||
"""Compress a string by replacing redundant chunks by identifiers. Chunks are defined by the split_regex."""
|
||||
text_list = re.split(split_regex, text)
|
||||
text_list = [chunk.strip() for chunk in text_list]
|
||||
@@ -44,7 +46,7 @@ def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'):
|
||||
return def_dict, compressed_text
|
||||
|
||||
|
||||
def compress_string(text):
|
||||
def compress_string(text: str) -> str:
|
||||
"""Compress a string by replacing redundant paragraphs and lines with identifiers."""
|
||||
# Perform paragraph-level compression
|
||||
def_dict, compressed_text = _compress_chunks(
|
||||
@@ -67,7 +69,7 @@ def compress_string(text):
|
||||
return definitions + '\n' + compressed_text
|
||||
|
||||
|
||||
def extract_html_tags(text, keys):
|
||||
def extract_html_tags(text: str, keys: list[str]) -> dict[str, list[str]]:
|
||||
"""Extract the content within HTML tags for a list of keys.
|
||||
|
||||
Parameters
|
||||
@@ -102,7 +104,12 @@ class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def parse_html_tags_raise(text, keys=(), optional_keys=(), merge_multiple=False):
|
||||
def parse_html_tags_raise(
|
||||
text: str,
|
||||
keys: list[str] | None = None,
|
||||
optional_keys: list[str] | None = None,
|
||||
merge_multiple: bool = False,
|
||||
) -> dict[str, str]:
|
||||
"""A version of parse_html_tags that raises an exception if the parsing is not successful."""
|
||||
content_dict, valid, retry_message = parse_html_tags(
|
||||
text, keys, optional_keys, merge_multiple=merge_multiple
|
||||
@@ -112,7 +119,12 @@ def parse_html_tags_raise(text, keys=(), optional_keys=(), merge_multiple=False)
|
||||
return content_dict
|
||||
|
||||
|
||||
def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
|
||||
def parse_html_tags(
|
||||
text: str,
|
||||
keys: list[str] | None = None,
|
||||
optional_keys: list[str] | None = None,
|
||||
merge_multiple: bool = False,
|
||||
) -> tuple[dict[str, str], bool, str]:
|
||||
"""Satisfy the parse api, extracts 1 match per key and validates that all keys are present
|
||||
|
||||
Parameters
|
||||
@@ -133,9 +145,12 @@ def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
|
||||
str
|
||||
A message to be displayed to the agent if the parsing was not successful.
|
||||
"""
|
||||
all_keys = tuple(keys) + tuple(optional_keys)
|
||||
keys = keys or []
|
||||
optional_keys = optional_keys or []
|
||||
all_keys = list(keys) + list(optional_keys)
|
||||
content_dict = extract_html_tags(text, all_keys)
|
||||
retry_messages = []
|
||||
result_dict: dict[str, str] = {}
|
||||
|
||||
for key in all_keys:
|
||||
if key not in content_dict:
|
||||
@@ -143,7 +158,6 @@ def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
|
||||
retry_messages.append(f'Missing the key <{key}> in the answer.')
|
||||
else:
|
||||
val = content_dict[key]
|
||||
content_dict[key] = val[0]
|
||||
if len(val) > 1:
|
||||
if not merge_multiple:
|
||||
retry_messages.append(
|
||||
@@ -151,8 +165,10 @@ def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
|
||||
)
|
||||
else:
|
||||
# merge the multiple instances
|
||||
content_dict[key] = '\n'.join(val)
|
||||
result_dict[key] = '\n'.join(val)
|
||||
else:
|
||||
result_dict[key] = val[0]
|
||||
|
||||
valid = len(retry_messages) == 0
|
||||
retry_message = '\n'.join(retry_messages)
|
||||
return content_dict, valid, retry_message
|
||||
return result_dict, valid, retry_message
|
||||
|
||||
@@ -475,8 +475,9 @@ def combine_thought(action: Action, thought: str) -> Action:
|
||||
def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
actions: list[Action] = []
|
||||
assert len(response.choices) == 1, 'Only one choice is supported for now'
|
||||
assistant_msg = response.choices[0].message
|
||||
if assistant_msg.tool_calls:
|
||||
choice = response.choices[0]
|
||||
assistant_msg = choice.message
|
||||
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
|
||||
# Check if there's assistant_msg.content. If so, add it to the thought
|
||||
thought = ''
|
||||
if isinstance(assistant_msg.content, str):
|
||||
@@ -592,7 +593,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
actions.append(action)
|
||||
else:
|
||||
actions.append(
|
||||
MessageAction(content=assistant_msg.content, wait_for_response=True)
|
||||
MessageAction(
|
||||
content=str(assistant_msg.content) if assistant_msg.content else '',
|
||||
wait_for_response=True,
|
||||
)
|
||||
)
|
||||
|
||||
assert len(actions) >= 1
|
||||
|
||||
@@ -22,7 +22,7 @@ def parse_response(orig_response: str) -> Action:
|
||||
return action_from_dict(action_dict)
|
||||
|
||||
|
||||
def to_json(obj, **kwargs):
|
||||
def to_json(obj: object, **kwargs: dict) -> str:
|
||||
"""Serialize an object to str format"""
|
||||
return json.dumps(obj, **kwargs)
|
||||
|
||||
@@ -32,7 +32,9 @@ class MicroAgent(Agent):
|
||||
prompt = ''
|
||||
agent_definition: dict = {}
|
||||
|
||||
def history_to_json(self, history: list[Event], max_events: int = 20, **kwargs):
|
||||
def history_to_json(
|
||||
self, history: list[Event], max_events: int = 20, **kwargs: dict
|
||||
) -> str:
|
||||
"""
|
||||
Serialize and simplify history to str format
|
||||
"""
|
||||
@@ -60,7 +62,7 @@ class MicroAgent(Agent):
|
||||
super().__init__(llm, config)
|
||||
if 'name' not in self.agent_definition:
|
||||
raise ValueError('Agent definition must contain a name')
|
||||
self.prompt_template = Environment(loader=BaseLoader).from_string(self.prompt)
|
||||
self.prompt_template = Environment(loader=BaseLoader()).from_string(self.prompt)
|
||||
self.delegates = all_microagents.copy()
|
||||
del self.delegates[self.agent_definition['name']]
|
||||
|
||||
@@ -74,7 +76,7 @@ class MicroAgent(Agent):
|
||||
delegates=self.delegates,
|
||||
latest_user_message=last_user_message,
|
||||
)
|
||||
content = [TextContent(text=prompt)]
|
||||
content: list[TextContent | ImageContent] = [TextContent(text=prompt)]
|
||||
if self.llm.vision_is_active() and last_image_urls:
|
||||
content.append(ImageContent(image_urls=last_image_urls))
|
||||
message = Message(role='user', content=content)
|
||||
|
||||
@@ -29,7 +29,9 @@ def get_error_prefix(obs: BrowserOutputObservation) -> str:
|
||||
return f'## Error from previous action:\n{obs.last_browser_action_error}\n'
|
||||
|
||||
|
||||
def create_goal_prompt(goal: str, image_urls: list[str] | None):
|
||||
def create_goal_prompt(
|
||||
goal: str, image_urls: list[str] | None
|
||||
) -> tuple[str, list[str]]:
|
||||
goal_txt: str = f"""\
|
||||
# Instructions
|
||||
Review the current state of the page and all other information to find the best possible next action to accomplish your goal. Your answer will be interpreted and executed by a program, make sure to follow the formatting instructions.
|
||||
@@ -52,7 +54,7 @@ def create_observation_prompt(
|
||||
focused_element: str,
|
||||
error_prefix: str,
|
||||
som_screenshot: str | None,
|
||||
):
|
||||
) -> tuple[str, str | None]:
|
||||
txt_observation = f"""
|
||||
# Observation of current step:
|
||||
{tabs}{axtree_txt}{focused_element}{error_prefix}
|
||||
@@ -273,7 +275,9 @@ Note:
|
||||
observation_txt, som_screenshot = create_observation_prompt(
|
||||
cur_axtree_txt, tabs, focused_element, error_prefix, set_of_marks
|
||||
)
|
||||
human_prompt = [TextContent(type='text', text=goal_txt)]
|
||||
human_prompt: list[TextContent | ImageContent] = [
|
||||
TextContent(type='text', text=goal_txt)
|
||||
]
|
||||
if len(goal_images) > 0:
|
||||
human_prompt.append(ImageContent(image_urls=goal_images))
|
||||
human_prompt.append(TextContent(type='text', text=observation_txt))
|
||||
|
||||
@@ -21,6 +21,7 @@ from openhands.core.exceptions import (
|
||||
AgentStuckInLoopError,
|
||||
FunctionCallNotExistsError,
|
||||
FunctionCallValidationError,
|
||||
LLMContextWindowExceedError,
|
||||
LLMMalformedActionError,
|
||||
LLMNoActionError,
|
||||
LLMResponseError,
|
||||
@@ -50,7 +51,7 @@ from openhands.events.observation import (
|
||||
NullObservation,
|
||||
Observation,
|
||||
)
|
||||
from openhands.events.serialization.event import truncate_content
|
||||
from openhands.events.serialization.event import event_to_trajectory, truncate_content
|
||||
from openhands.llm.llm import LLM
|
||||
|
||||
# note: RESUME is only available on web GUI
|
||||
@@ -148,12 +149,13 @@ class AgentController:
|
||||
# replay-related
|
||||
self._replay_manager = ReplayManager(replay_events)
|
||||
|
||||
async def close(self) -> None:
|
||||
async def close(self, set_stop_state=True) -> None:
|
||||
"""Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream.
|
||||
|
||||
Note that it's fairly important that this closes properly, otherwise the state is incomplete.
|
||||
"""
|
||||
await self.set_agent_state_to(AgentState.STOPPED)
|
||||
if set_stop_state:
|
||||
await self.set_agent_state_to(AgentState.STOPPED)
|
||||
|
||||
# we made history, now is the time to rewrite it!
|
||||
# the final state.history will be used by external scripts like evals, tests, etc.
|
||||
@@ -251,6 +253,7 @@ class AgentController:
|
||||
isinstance(e, litellm.AuthenticationError)
|
||||
or isinstance(e, litellm.BadRequestError)
|
||||
or isinstance(e, RateLimitError)
|
||||
or isinstance(e, LLMContextWindowExceedError)
|
||||
):
|
||||
reported = e
|
||||
await self._react_to_exception(reported)
|
||||
@@ -698,24 +701,13 @@ class AgentController:
|
||||
or 'prompt is too long' in error_str
|
||||
or isinstance(e, ContextWindowExceededError)
|
||||
):
|
||||
# When context window is exceeded, keep roughly half of agent interactions
|
||||
self.state.history = self._apply_conversation_window(
|
||||
self.state.history
|
||||
)
|
||||
|
||||
# Save the ID of the first event in our truncated history for future reloading
|
||||
if self.state.history:
|
||||
self.state.start_id = self.state.history[0].id
|
||||
|
||||
# Add an error event to trigger another step by the agent
|
||||
self.event_stream.add_event(
|
||||
AgentCondensationObservation(
|
||||
content='Trimming prompt to meet context window limitations'
|
||||
),
|
||||
EventSource.AGENT,
|
||||
)
|
||||
return
|
||||
raise e
|
||||
if self.agent.config.enable_history_truncation:
|
||||
self._handle_long_context_error()
|
||||
return
|
||||
else:
|
||||
raise LLMContextWindowExceedError()
|
||||
else:
|
||||
raise e
|
||||
|
||||
if action.runnable:
|
||||
if self.state.confirmation_mode and (
|
||||
@@ -842,6 +834,11 @@ class AgentController:
|
||||
# Always load from the event stream to avoid losing history
|
||||
self._init_history()
|
||||
|
||||
def get_trajectory(self) -> list[dict]:
|
||||
# state history could be partially hidden/truncated before controller is closed
|
||||
assert self._closed
|
||||
return [event_to_trajectory(event) for event in self.state.history]
|
||||
|
||||
def _init_history(self) -> None:
|
||||
"""Initializes the agent's history from the event stream.
|
||||
|
||||
@@ -967,6 +964,22 @@ class AgentController:
|
||||
# make sure history is in sync
|
||||
self.state.start_id = start_id
|
||||
|
||||
def _handle_long_context_error(self) -> None:
|
||||
# When context window is exceeded, keep roughly half of agent interactions
|
||||
self.state.history = self._apply_conversation_window(self.state.history)
|
||||
|
||||
# Save the ID of the first event in our truncated history for future reloading
|
||||
if self.state.history:
|
||||
self.state.start_id = self.state.history[0].id
|
||||
|
||||
# Add an error event to trigger another step by the agent
|
||||
self.event_stream.add_event(
|
||||
AgentCondensationObservation(
|
||||
content='Trimming prompt to meet context window limitations'
|
||||
),
|
||||
EventSource.AGENT,
|
||||
)
|
||||
|
||||
def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
|
||||
"""Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
|
||||
and ensuring the first user message is always included.
|
||||
|
||||
@@ -100,6 +100,7 @@ async def main(loop: asyncio.AbstractEventLoop):
|
||||
initial_user_action = MessageAction(content=task_str) if task_str else None
|
||||
|
||||
sid = str(uuid4())
|
||||
display_message(f'Session ID: {sid}')
|
||||
|
||||
runtime = create_runtime(config, sid=sid, headless_mode=True)
|
||||
await runtime.connect()
|
||||
|
||||
@@ -5,6 +5,7 @@ from openhands.core.config.config_utils import (
|
||||
OH_MAX_ITERATIONS,
|
||||
get_field_info,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.config.sandbox_config import SandboxConfig
|
||||
from openhands.core.config.security_config import SecurityConfig
|
||||
@@ -28,6 +29,7 @@ __all__ = [
|
||||
'LLMConfig',
|
||||
'SandboxConfig',
|
||||
'SecurityConfig',
|
||||
'ExtendedConfig',
|
||||
'load_app_config',
|
||||
'load_from_env',
|
||||
'load_from_toml',
|
||||
|
||||
@@ -18,6 +18,7 @@ class AgentConfig(BaseModel):
|
||||
enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True.
|
||||
disabled_microagents: A list of microagents to disable. Default is None.
|
||||
condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
|
||||
enable_history_truncation: If history should be truncated once LLM context limit is hit.
|
||||
"""
|
||||
|
||||
codeact_enable_browsing: bool = Field(default=True)
|
||||
@@ -31,3 +32,4 @@ class AgentConfig(BaseModel):
|
||||
enable_prompt_extensions: bool = Field(default=True)
|
||||
disabled_microagents: list[str] | None = Field(default=None)
|
||||
condenser: CondenserConfig = Field(default_factory=NoOpCondenserConfig)
|
||||
enable_history_truncation: bool = Field(default=True)
|
||||
|
||||
@@ -9,6 +9,7 @@ from openhands.core.config.config_utils import (
|
||||
OH_MAX_ITERATIONS,
|
||||
model_defaults_to_dict,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.config.sandbox_config import SandboxConfig
|
||||
from openhands.core.config.security_config import SecurityConfig
|
||||
@@ -52,6 +53,7 @@ class AppConfig(BaseModel):
|
||||
default_agent: str = Field(default=OH_DEFAULT_AGENT)
|
||||
sandbox: SandboxConfig = Field(default_factory=SandboxConfig)
|
||||
security: SecurityConfig = Field(default_factory=SecurityConfig)
|
||||
extended: ExtendedConfig = Field(default_factory=lambda: ExtendedConfig({}))
|
||||
runtime: str = Field(default='docker')
|
||||
file_store: str = Field(default='local')
|
||||
file_store_path: str = Field(default='/tmp/openhands_file_store')
|
||||
@@ -75,6 +77,9 @@ class AppConfig(BaseModel):
|
||||
file_uploads_restrict_file_types: bool = Field(default=False)
|
||||
file_uploads_allowed_extensions: list[str] = Field(default_factory=lambda: ['.*'])
|
||||
runloop_api_key: SecretStr | None = Field(default=None)
|
||||
daytona_api_key: SecretStr | None = Field(default=None)
|
||||
daytona_api_url: str = Field(default='https://app.daytona.io/api')
|
||||
daytona_target: str = Field(default='us')
|
||||
cli_multiline_input: bool = Field(default=False)
|
||||
conversation_max_age_seconds: int = Field(default=864000) # 10 days in seconds
|
||||
|
||||
|
||||
@@ -26,8 +26,10 @@ class RecentEventsCondenserConfig(BaseModel):
|
||||
"""Configuration for RecentEventsCondenser."""
|
||||
|
||||
type: Literal['recent'] = Field('recent')
|
||||
|
||||
# at least one event by default, because the best guess is that it is the user task
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
default=1,
|
||||
description='The number of initial events to condense.',
|
||||
ge=0,
|
||||
)
|
||||
@@ -43,6 +45,8 @@ class LLMSummarizingCondenserConfig(BaseModel):
|
||||
llm_config: LLMConfig = Field(
|
||||
..., description='Configuration for the LLM to use for condensing.'
|
||||
)
|
||||
|
||||
# at least one event by default, because the best guess is that it's the user task
|
||||
keep_first: int = Field(
|
||||
default=1,
|
||||
description='The number of initial events to condense.',
|
||||
@@ -62,8 +66,10 @@ class AmortizedForgettingCondenserConfig(BaseModel):
|
||||
description='Maximum size of the condensed history before triggering forgetting.',
|
||||
ge=2,
|
||||
)
|
||||
|
||||
# at least one event by default, because the best guess is that it's the user task
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
default=1,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
@@ -81,8 +87,10 @@ class LLMAttentionCondenserConfig(BaseModel):
|
||||
description='Maximum size of the condensed history before triggering forgetting.',
|
||||
ge=2,
|
||||
)
|
||||
|
||||
# at least one event by default, because the best guess is that it's the user task
|
||||
keep_first: int = Field(
|
||||
default=0,
|
||||
default=1,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
|
||||
@@ -25,14 +25,20 @@ def get_field_info(field: FieldInfo) -> dict[str, Any]:
|
||||
# Note: this only works for UnionTypes with None as one of the types
|
||||
if get_origin(field_type) is UnionType:
|
||||
types = get_args(field_type)
|
||||
non_none_arg = next((t for t in types if t is not type(None)), None)
|
||||
non_none_arg = next(
|
||||
(t for t in types if t is not None and t is not type(None)), None
|
||||
)
|
||||
if non_none_arg is not None:
|
||||
field_type = non_none_arg
|
||||
optional = True
|
||||
|
||||
# type name in a pretty format
|
||||
type_name = (
|
||||
field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
|
||||
str(field_type)
|
||||
if field_type is None
|
||||
else (
|
||||
field_type.__name__ if hasattr(field_type, '__name__') else str(field_type)
|
||||
)
|
||||
)
|
||||
|
||||
# default is always present
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
from pydantic import RootModel
|
||||
|
||||
|
||||
class ExtendedConfig(RootModel[dict]):
|
||||
"""Configuration for extended functionalities.
|
||||
|
||||
This is implemented as a root model so that the entire input is stored
|
||||
as the root value. This allows arbitrary keys to be stored and later
|
||||
accessed via attribute or dictionary-style access.
|
||||
"""
|
||||
|
||||
@property
|
||||
def root(self) -> dict: # type annotation to help mypy
|
||||
return super().root
|
||||
|
||||
def __str__(self) -> str:
|
||||
# Use the root dict to build a string representation.
|
||||
attr_str = [f'{k}={repr(v)}' for k, v in self.root.items()]
|
||||
return f"ExtendedConfig({', '.join(attr_str)})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> 'ExtendedConfig':
|
||||
# Create an instance directly by wrapping the input dict.
|
||||
return cls(data)
|
||||
|
||||
def __getitem__(self, key: str) -> object:
|
||||
# Provide dictionary-like access via the root dict.
|
||||
return self.root[key]
|
||||
|
||||
def __getattr__(self, key: str) -> object:
|
||||
# Fallback for attribute access using the root dict.
|
||||
try:
|
||||
return self.root[key]
|
||||
except KeyError as e:
|
||||
raise AttributeError(
|
||||
f"'ExtendedConfig' object has no attribute '{key}'"
|
||||
) from e
|
||||
@@ -53,11 +53,11 @@ class SandboxConfig(BaseModel):
|
||||
remote_runtime_api_timeout: int = Field(default=10)
|
||||
remote_runtime_enable_retries: bool = Field(default=False)
|
||||
remote_runtime_class: str | None = Field(
|
||||
default='sysbox'
|
||||
default=None
|
||||
) # can be "None" (default to gvisor) or "sysbox" (support docker inside runtime + more stable)
|
||||
enable_auto_lint: bool = Field(
|
||||
default=False # once enabled, OpenHands would lint files after editing
|
||||
)
|
||||
default=False
|
||||
) # once enabled, OpenHands would lint files after editing
|
||||
use_host_network: bool = Field(default=False)
|
||||
runtime_extra_build_args: list[str] | None = Field(default=None)
|
||||
initialize_plugins: bool = Field(default=True)
|
||||
|
||||
@@ -19,6 +19,7 @@ from openhands.core.config.config_utils import (
|
||||
OH_DEFAULT_AGENT,
|
||||
OH_MAX_ITERATIONS,
|
||||
)
|
||||
from openhands.core.config.extended_config import ExtendedConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.config.sandbox_config import SandboxConfig
|
||||
from openhands.core.config.security_config import SecurityConfig
|
||||
@@ -134,6 +135,10 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml') -> None:
|
||||
for key, value in toml_config.items():
|
||||
if isinstance(value, dict):
|
||||
try:
|
||||
if key.lower() == 'extended':
|
||||
# For ExtendedConfig (RootModel), pass the entire dict as the root value
|
||||
cfg.extended = ExtendedConfig(value)
|
||||
continue
|
||||
if key is not None and key.lower() == 'agent':
|
||||
# Every entry here is either a field for the default `agent` config group, or itself a group
|
||||
# The best way to tell the difference is to try to parse it as an AgentConfig object
|
||||
|
||||
@@ -10,17 +10,17 @@ class AgentError(Exception):
|
||||
|
||||
|
||||
class AgentNoInstructionError(AgentError):
|
||||
def __init__(self, message='Instruction must be provided'):
|
||||
def __init__(self, message: str = 'Instruction must be provided') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentEventTypeError(AgentError):
|
||||
def __init__(self, message='Event must be a dictionary'):
|
||||
def __init__(self, message: str = 'Event must be a dictionary') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class AgentAlreadyRegisteredError(AgentError):
|
||||
def __init__(self, name=None):
|
||||
def __init__(self, name: str | None = None) -> None:
|
||||
if name is not None:
|
||||
message = f"Agent class already registered under '{name}'"
|
||||
else:
|
||||
@@ -29,7 +29,7 @@ class AgentAlreadyRegisteredError(AgentError):
|
||||
|
||||
|
||||
class AgentNotRegisteredError(AgentError):
|
||||
def __init__(self, name=None):
|
||||
def __init__(self, name: str | None = None) -> None:
|
||||
if name is not None:
|
||||
message = f"No agent class registered under '{name}'"
|
||||
else:
|
||||
@@ -38,7 +38,7 @@ class AgentNotRegisteredError(AgentError):
|
||||
|
||||
|
||||
class AgentStuckInLoopError(AgentError):
|
||||
def __init__(self, message='Agent got stuck in a loop'):
|
||||
def __init__(self, message: str = 'Agent got stuck in a loop') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class AgentStuckInLoopError(AgentError):
|
||||
|
||||
|
||||
class TaskInvalidStateError(Exception):
|
||||
def __init__(self, state=None):
|
||||
def __init__(self, state: str | None = None) -> None:
|
||||
if state is not None:
|
||||
message = f'Invalid state {state}'
|
||||
else:
|
||||
@@ -64,37 +64,47 @@ class TaskInvalidStateError(Exception):
|
||||
# This exception gets sent back to the LLM
|
||||
# It might be malformed JSON
|
||||
class LLMMalformedActionError(Exception):
|
||||
def __init__(self, message='Malformed response'):
|
||||
def __init__(self, message: str = 'Malformed response') -> None:
|
||||
self.message = message
|
||||
super().__init__(message)
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return self.message
|
||||
|
||||
|
||||
# This exception gets sent back to the LLM
|
||||
# For some reason, the agent did not return an action
|
||||
class LLMNoActionError(Exception):
|
||||
def __init__(self, message='Agent must return an action'):
|
||||
def __init__(self, message: str = 'Agent must return an action') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
# This exception gets sent back to the LLM
|
||||
# The LLM output did not include an action, or the action was not the expected type
|
||||
class LLMResponseError(Exception):
|
||||
def __init__(self, message='Failed to retrieve action from LLM response'):
|
||||
def __init__(
|
||||
self, message: str = 'Failed to retrieve action from LLM response'
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class UserCancelledError(Exception):
|
||||
def __init__(self, message='User cancelled the request'):
|
||||
def __init__(self, message: str = 'User cancelled the request') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class OperationCancelled(Exception):
|
||||
"""Exception raised when an operation is cancelled (e.g. by a keyboard interrupt)."""
|
||||
|
||||
def __init__(self, message='Operation was cancelled'):
|
||||
def __init__(self, message: str = 'Operation was cancelled') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class LLMContextWindowExceedError(RuntimeError):
|
||||
def __init__(
|
||||
self,
|
||||
message: str = 'Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error',
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -109,7 +119,7 @@ class FunctionCallConversionError(Exception):
|
||||
This typically happens when there's a malformed message (e.g., missing <function=...> tags). But not due to LLM output.
|
||||
"""
|
||||
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -119,14 +129,14 @@ class FunctionCallValidationError(Exception):
|
||||
This typically happens when the LLM outputs unrecognized function call / parameter names / values.
|
||||
"""
|
||||
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class FunctionCallNotExistsError(Exception):
|
||||
"""Exception raised when an LLM call a tool that is not registered."""
|
||||
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -183,15 +193,17 @@ class AgentRuntimeNotFoundError(AgentRuntimeUnavailableError):
|
||||
|
||||
|
||||
class BrowserInitException(Exception):
|
||||
def __init__(self, message='Failed to initialize browser environment'):
|
||||
def __init__(
|
||||
self, message: str = 'Failed to initialize browser environment'
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class BrowserUnavailableException(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
message='Browser environment is not available, please check if has been initialized',
|
||||
):
|
||||
message: str = 'Browser environment is not available, please check if has been initialized',
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
@@ -209,5 +221,5 @@ class MicroAgentError(Exception):
|
||||
class MicroAgentValidationError(MicroAgentError):
|
||||
"""Raised when there's a validation error in microagent metadata."""
|
||||
|
||||
def __init__(self, message='Micro agent validation failed'):
|
||||
def __init__(self, message: str = 'Micro agent validation failed') -> None:
|
||||
super().__init__(message)
|
||||
|
||||
+44
-29
@@ -74,10 +74,11 @@ LOG_COLORS: Mapping[str, ColorType] = {
|
||||
|
||||
|
||||
class StackInfoFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if record.levelno >= logging.ERROR:
|
||||
record.stack_info = True
|
||||
record.exc_info = True
|
||||
# LogRecord attributes are dynamically typed
|
||||
setattr(record, 'stack_info', True)
|
||||
setattr(record, 'exc_info', sys.exc_info())
|
||||
return True
|
||||
|
||||
|
||||
@@ -107,9 +108,9 @@ def strip_ansi(s: str) -> str:
|
||||
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
def format(self, record):
|
||||
msg_type = record.__dict__.get('msg_type')
|
||||
event_source = record.__dict__.get('event_source')
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
msg_type = record.__dict__.get('msg_type', '')
|
||||
event_source = record.__dict__.get('event_source', '')
|
||||
if event_source:
|
||||
new_msg_type = f'{event_source.upper()}_{msg_type}'
|
||||
if new_msg_type in LOG_COLORS:
|
||||
@@ -136,12 +137,13 @@ class ColoredFormatter(logging.Formatter):
|
||||
return super().format(new_record)
|
||||
|
||||
|
||||
def _fix_record(record: logging.LogRecord):
|
||||
def _fix_record(record: logging.LogRecord) -> logging.LogRecord:
|
||||
new_record = copy.copy(record)
|
||||
# The formatter expects non boolean values, and will raise an exception if there is a boolean - so we fix these
|
||||
if new_record.exc_info is True and not new_record.exc_text: # type: ignore
|
||||
new_record.exc_info = sys.exc_info() # type: ignore
|
||||
new_record.stack_info = None # type: ignore
|
||||
# LogRecord attributes are dynamically typed
|
||||
if getattr(new_record, 'exc_info', None) is True:
|
||||
setattr(new_record, 'exc_info', sys.exc_info())
|
||||
setattr(new_record, 'stack_info', None)
|
||||
return new_record
|
||||
|
||||
|
||||
@@ -158,32 +160,32 @@ class RollingLogger:
|
||||
log_lines: list[str]
|
||||
all_lines: str
|
||||
|
||||
def __init__(self, max_lines=10, char_limit=80):
|
||||
def __init__(self, max_lines: int = 10, char_limit: int = 80) -> None:
|
||||
self.max_lines = max_lines
|
||||
self.char_limit = char_limit
|
||||
self.log_lines = [''] * self.max_lines
|
||||
self.all_lines = ''
|
||||
|
||||
def is_enabled(self):
|
||||
def is_enabled(self) -> bool:
|
||||
return DEBUG and sys.stdout.isatty()
|
||||
|
||||
def start(self, message=''):
|
||||
def start(self, message: str = '') -> None:
|
||||
if message:
|
||||
print(message)
|
||||
self._write('\n' * self.max_lines)
|
||||
self._flush()
|
||||
|
||||
def add_line(self, line):
|
||||
def add_line(self, line: str) -> None:
|
||||
self.log_lines.pop(0)
|
||||
self.log_lines.append(line[: self.char_limit])
|
||||
self.print_lines()
|
||||
self.all_lines += line + '\n'
|
||||
|
||||
def write_immediately(self, line):
|
||||
def write_immediately(self, line: str) -> None:
|
||||
self._write(line)
|
||||
self._flush()
|
||||
|
||||
def print_lines(self):
|
||||
def print_lines(self) -> None:
|
||||
"""Display the last n log_lines in the console (not for file logging).
|
||||
|
||||
This will create the effect of a rolling display in the console.
|
||||
@@ -192,37 +194,39 @@ class RollingLogger:
|
||||
for line in self.log_lines:
|
||||
self.replace_current_line(line)
|
||||
|
||||
def move_back(self, amount=-1):
|
||||
def move_back(self, amount: int = -1) -> None:
|
||||
r"""'\033[F' moves the cursor up one line."""
|
||||
if amount == -1:
|
||||
amount = self.max_lines
|
||||
self._write('\033[F' * (self.max_lines))
|
||||
self._flush()
|
||||
|
||||
def replace_current_line(self, line=''):
|
||||
def replace_current_line(self, line: str = '') -> None:
|
||||
r"""'\033[2K\r' clears the line and moves the cursor to the beginning of the line."""
|
||||
self._write('\033[2K' + line + '\n')
|
||||
self._flush()
|
||||
|
||||
def _write(self, line):
|
||||
def _write(self, line: str) -> None:
|
||||
if not self.is_enabled():
|
||||
return
|
||||
sys.stdout.write(line)
|
||||
|
||||
def _flush(self):
|
||||
def _flush(self) -> None:
|
||||
if not self.is_enabled():
|
||||
return
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
class SensitiveDataFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
# Gather sensitive values which should not ever appear in the logs.
|
||||
sensitive_values = []
|
||||
for key, value in os.environ.items():
|
||||
key_upper = key.upper()
|
||||
if len(value) > 2 and any(
|
||||
s in key_upper for s in ('SECRET', 'KEY', 'CODE', 'TOKEN')
|
||||
if (
|
||||
len(value) > 2
|
||||
and value != 'default'
|
||||
and any(s in key_upper for s in ('SECRET', 'KEY', 'CODE', 'TOKEN'))
|
||||
):
|
||||
sensitive_values.append(value)
|
||||
|
||||
@@ -243,6 +247,7 @@ class SensitiveDataFilter(logging.Filter):
|
||||
'modal_api_token_secret',
|
||||
'llm_api_key',
|
||||
'sandbox_env_github_token',
|
||||
'daytona_api_key',
|
||||
]
|
||||
|
||||
# add env var names
|
||||
@@ -260,7 +265,9 @@ class SensitiveDataFilter(logging.Filter):
|
||||
return True
|
||||
|
||||
|
||||
def get_console_handler(log_level: int = logging.INFO, extra_info: str | None = None):
|
||||
def get_console_handler(
|
||||
log_level: int = logging.INFO, extra_info: str | None = None
|
||||
) -> logging.StreamHandler:
|
||||
"""Returns a console handler for logging."""
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(log_level)
|
||||
@@ -271,7 +278,9 @@ def get_console_handler(log_level: int = logging.INFO, extra_info: str | None =
|
||||
return console_handler
|
||||
|
||||
|
||||
def get_file_handler(log_dir: str, log_level: int = logging.INFO):
|
||||
def get_file_handler(
|
||||
log_dir: str, log_level: int = logging.INFO
|
||||
) -> logging.FileHandler:
|
||||
"""Returns a file handler for logging."""
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d')
|
||||
@@ -345,7 +354,13 @@ logging.getLogger('LiteLLM Proxy').disabled = True
|
||||
class LlmFileHandler(logging.FileHandler):
|
||||
"""LLM prompt and response logging."""
|
||||
|
||||
def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
|
||||
def __init__(
|
||||
self,
|
||||
filename: str,
|
||||
mode: str = 'a',
|
||||
encoding: str = 'utf-8',
|
||||
delay: bool = False,
|
||||
) -> None:
|
||||
"""Initializes an instance of LlmFileHandler.
|
||||
|
||||
Args:
|
||||
@@ -376,7 +391,7 @@ class LlmFileHandler(logging.FileHandler):
|
||||
self.baseFilename = os.path.join(self.log_directory, filename)
|
||||
super().__init__(self.baseFilename, mode, encoding, delay)
|
||||
|
||||
def emit(self, record):
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
"""Emits a log record.
|
||||
|
||||
Args:
|
||||
@@ -391,7 +406,7 @@ class LlmFileHandler(logging.FileHandler):
|
||||
self.message_counter += 1
|
||||
|
||||
|
||||
def _get_llm_file_handler(name: str, log_level: int):
|
||||
def _get_llm_file_handler(name: str, log_level: int) -> LlmFileHandler:
|
||||
# The 'delay' parameter, when set to True, postpones the opening of the log file
|
||||
# until the first log message is emitted.
|
||||
llm_file_handler = LlmFileHandler(name, delay=True)
|
||||
@@ -400,7 +415,7 @@ def _get_llm_file_handler(name: str, log_level: int):
|
||||
return llm_file_handler
|
||||
|
||||
|
||||
def _setup_llm_logger(name: str, log_level: int):
|
||||
def _setup_llm_logger(name: str, log_level: int) -> logging.Logger:
|
||||
logger = logging.getLogger(name)
|
||||
logger.propagate = False
|
||||
logger.setLevel(log_level)
|
||||
|
||||
@@ -27,7 +27,6 @@ from openhands.events.action.action import Action
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import AgentStateChangedObservation
|
||||
from openhands.events.serialization import event_from_dict
|
||||
from openhands.events.serialization.event import event_to_trajectory
|
||||
from openhands.io import read_input, read_task
|
||||
from openhands.runtime.base import Runtime
|
||||
|
||||
@@ -167,6 +166,8 @@ async def run_controller(
|
||||
# NOTE: the saved state does not include delegates events
|
||||
end_state.save_to_session(event_stream.sid, event_stream.file_store)
|
||||
|
||||
await controller.close(set_stop_state=False)
|
||||
|
||||
state = controller.get_state()
|
||||
|
||||
# save trajectories if applicable
|
||||
@@ -177,7 +178,7 @@ async def run_controller(
|
||||
else:
|
||||
file_path = config.save_trajectory_path
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
histories = [event_to_trajectory(event) for event in state.history]
|
||||
histories = controller.get_trajectory()
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(histories, f)
|
||||
|
||||
|
||||
@@ -15,7 +15,9 @@ class Content(BaseModel):
|
||||
cache_prompt: bool = False
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
def serialize_model(
|
||||
self,
|
||||
) -> dict[str, str | dict[str, str]] | list[dict[str, str | dict[str, str]]]:
|
||||
raise NotImplementedError('Subclasses should implement this method.')
|
||||
|
||||
|
||||
@@ -24,7 +26,7 @@ class TextContent(Content):
|
||||
text: str
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
def serialize_model(self) -> dict[str, str | dict[str, str]]:
|
||||
data: dict[str, str | dict[str, str]] = {
|
||||
'type': self.type,
|
||||
'text': self.text,
|
||||
@@ -39,7 +41,7 @@ class ImageContent(Content):
|
||||
image_urls: list[str]
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
def serialize_model(self) -> list[dict[str, str | dict[str, str]]]:
|
||||
images: list[dict[str, str | dict[str, str]]] = []
|
||||
for url in self.image_urls:
|
||||
images.append({'type': self.type, 'image_url': {'url': url}})
|
||||
@@ -101,15 +103,22 @@ class Message(BaseModel):
|
||||
# See discussion here for details: https://github.com/BerriAI/litellm/issues/6422#issuecomment-2438765472
|
||||
if self.role == 'tool' and item.cache_prompt:
|
||||
role_tool_with_prompt_caching = True
|
||||
if isinstance(d, dict):
|
||||
d.pop('cache_control')
|
||||
elif isinstance(d, list):
|
||||
for d_item in d:
|
||||
d_item.pop('cache_control')
|
||||
if isinstance(item, TextContent):
|
||||
d.pop('cache_control', None)
|
||||
elif isinstance(item, ImageContent):
|
||||
# ImageContent.model_dump() always returns a list
|
||||
# We know d is a list of dicts for ImageContent
|
||||
if hasattr(d, '__iter__'):
|
||||
for d_item in d:
|
||||
if hasattr(d_item, 'pop'):
|
||||
d_item.pop('cache_control', None)
|
||||
|
||||
if isinstance(item, TextContent):
|
||||
content.append(d)
|
||||
elif isinstance(item, ImageContent) and self.vision_enabled:
|
||||
content.extend(d)
|
||||
# ImageContent.model_dump() always returns a list
|
||||
# We know d is a list for ImageContent
|
||||
content.extend([d] if isinstance(d, dict) else d)
|
||||
|
||||
message_dict: dict = {'content': content, 'role': self.role}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ from openhands.events.observation import (
|
||||
from openhands.events.observation.error import ErrorObservation
|
||||
from openhands.events.observation.observation import Observation
|
||||
from openhands.events.serialization.event import truncate_content
|
||||
from openhands.llm.metrics import Metrics, TokenUsage
|
||||
|
||||
|
||||
def events_to_messages(
|
||||
@@ -159,7 +160,7 @@ def get_action_message(
|
||||
)
|
||||
|
||||
llm_response: ModelResponse = tool_metadata.model_response
|
||||
assistant_msg = llm_response.choices[0].message
|
||||
assistant_msg = getattr(llm_response.choices[0], 'message')
|
||||
|
||||
# Add the LLM message (assistant) that initiated the tool calls
|
||||
# (overwrites any previous message with the same response_id)
|
||||
@@ -167,7 +168,7 @@ def get_action_message(
|
||||
f'Tool calls type: {type(assistant_msg.tool_calls)}, value: {assistant_msg.tool_calls}'
|
||||
)
|
||||
pending_tool_call_action_messages[llm_response.id] = Message(
|
||||
role=assistant_msg.role,
|
||||
role=getattr(assistant_msg, 'role', 'assistant'),
|
||||
# tool call content SHOULD BE a string
|
||||
content=[TextContent(text=assistant_msg.content or '')]
|
||||
if assistant_msg.content is not None
|
||||
@@ -184,7 +185,7 @@ def get_action_message(
|
||||
tool_metadata = action.tool_call_metadata
|
||||
if tool_metadata is not None:
|
||||
# take the response message from the tool call
|
||||
assistant_msg = tool_metadata.model_response.choices[0].message
|
||||
assistant_msg = getattr(tool_metadata.model_response.choices[0], 'message')
|
||||
content = assistant_msg.content or ''
|
||||
|
||||
# save content if any, to thought
|
||||
@@ -196,9 +197,11 @@ def get_action_message(
|
||||
|
||||
# remove the tool call metadata
|
||||
action.tool_call_metadata = None
|
||||
if role not in ('user', 'system', 'assistant', 'tool'):
|
||||
raise ValueError(f'Invalid role: {role}')
|
||||
return [
|
||||
Message(
|
||||
role=role,
|
||||
role=role, # type: ignore[arg-type]
|
||||
content=[TextContent(text=action.thought)],
|
||||
)
|
||||
]
|
||||
@@ -207,9 +210,11 @@ def get_action_message(
|
||||
content = [TextContent(text=action.content or '')]
|
||||
if vision_is_active and action.image_urls:
|
||||
content.append(ImageContent(image_urls=action.image_urls))
|
||||
if role not in ('user', 'system', 'assistant', 'tool'):
|
||||
raise ValueError(f'Invalid role: {role}')
|
||||
return [
|
||||
Message(
|
||||
role=role,
|
||||
role=role, # type: ignore[arg-type]
|
||||
content=content,
|
||||
)
|
||||
]
|
||||
@@ -217,7 +222,7 @@ def get_action_message(
|
||||
content = [TextContent(text=f'User executed the command:\n{action.command}')]
|
||||
return [
|
||||
Message(
|
||||
role='user',
|
||||
role='user', # Always user for CmdRunAction
|
||||
content=content,
|
||||
)
|
||||
]
|
||||
@@ -351,17 +356,58 @@ def get_observation_message(
|
||||
|
||||
|
||||
def apply_prompt_caching(messages: list[Message]) -> None:
|
||||
"""Applies caching breakpoints to the messages."""
|
||||
"""Applies caching breakpoints to the messages.
|
||||
|
||||
For new Anthropic API, we only need to mark the last user or tool message as cacheable.
|
||||
"""
|
||||
# NOTE: this is only needed for anthropic
|
||||
# following logic here:
|
||||
# https://github.com/anthropics/anthropic-quickstarts/blob/8f734fd08c425c6ec91ddd613af04ff87d70c5a0/computer-use-demo/computer_use_demo/loop.py#L241-L262
|
||||
breakpoints_remaining = 3 # remaining 1 for system/tool
|
||||
for message in reversed(messages):
|
||||
if message.role in ('user', 'tool'):
|
||||
if breakpoints_remaining > 0:
|
||||
message.content[
|
||||
-1
|
||||
].cache_prompt = True # Last item inside the message content
|
||||
breakpoints_remaining -= 1
|
||||
else:
|
||||
break
|
||||
message.content[
|
||||
-1
|
||||
].cache_prompt = True # Last item inside the message content
|
||||
break
|
||||
|
||||
|
||||
def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | None:
|
||||
"""
|
||||
Returns at most one token usage record for the `model_response.id` in this event's
|
||||
`tool_call_metadata`.
|
||||
|
||||
If no response_id is found, or none match in metrics.token_usages, returns None.
|
||||
"""
|
||||
if event.tool_call_metadata and event.tool_call_metadata.model_response:
|
||||
response_id = event.tool_call_metadata.model_response.get('id')
|
||||
if response_id:
|
||||
return next(
|
||||
(
|
||||
usage
|
||||
for usage in metrics.token_usages
|
||||
if usage.response_id == response_id
|
||||
),
|
||||
None,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_token_usage_for_event_id(
|
||||
events: list[Event], event_id: int, metrics: Metrics
|
||||
) -> TokenUsage | None:
|
||||
"""
|
||||
Starting from the event with .id == event_id and moving backwards in `events`,
|
||||
find the first TokenUsage record (if any) associated with a response_id from
|
||||
tool_call_metadata.model_response.id.
|
||||
|
||||
Returns the first match found, or None if none is found.
|
||||
"""
|
||||
# find the index of the event with the given id
|
||||
idx = next((i for i, e in enumerate(events) if e.id == event_id), None)
|
||||
if idx is None:
|
||||
return None
|
||||
|
||||
# search backward from idx down to 0
|
||||
for i in range(idx, -1, -1):
|
||||
usage = get_token_usage_for_event(events[i], metrics)
|
||||
if usage is not None:
|
||||
return usage
|
||||
return None
|
||||
|
||||
+22
-9
@@ -42,6 +42,7 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,)
|
||||
# cache prompt supporting models
|
||||
# remove this when we gemini and deepseek are supported
|
||||
CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-haiku-20241022',
|
||||
@@ -51,6 +52,7 @@ CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
|
||||
# function calling supporting models
|
||||
FUNCTION_CALLING_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
@@ -497,20 +499,21 @@ class LLM(RetryMixin, DebugMixin):
|
||||
stats += 'Response Latency: %.3f seconds\n' % latest_latency.latency
|
||||
|
||||
usage: Usage | None = response.get('usage')
|
||||
response_id = response.get('id', 'unknown')
|
||||
|
||||
if usage:
|
||||
# keep track of the input and output tokens
|
||||
input_tokens = usage.get('prompt_tokens')
|
||||
output_tokens = usage.get('completion_tokens')
|
||||
prompt_tokens = usage.get('prompt_tokens', 0)
|
||||
completion_tokens = usage.get('completion_tokens', 0)
|
||||
|
||||
if input_tokens:
|
||||
stats += 'Input tokens: ' + str(input_tokens)
|
||||
if prompt_tokens:
|
||||
stats += 'Input tokens: ' + str(prompt_tokens)
|
||||
|
||||
if output_tokens:
|
||||
if completion_tokens:
|
||||
stats += (
|
||||
(' | ' if input_tokens else '')
|
||||
(' | ' if prompt_tokens else '')
|
||||
+ 'Output tokens: '
|
||||
+ str(output_tokens)
|
||||
+ str(completion_tokens)
|
||||
+ '\n'
|
||||
)
|
||||
|
||||
@@ -519,7 +522,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
'prompt_tokens_details'
|
||||
)
|
||||
cache_hit_tokens = (
|
||||
prompt_tokens_details.cached_tokens if prompt_tokens_details else None
|
||||
prompt_tokens_details.cached_tokens if prompt_tokens_details else 0
|
||||
)
|
||||
if cache_hit_tokens:
|
||||
stats += 'Input tokens (cache hit): ' + str(cache_hit_tokens) + '\n'
|
||||
@@ -528,10 +531,20 @@ class LLM(RetryMixin, DebugMixin):
|
||||
# but litellm doesn't separate them in the usage stats
|
||||
# so we can read it from the provider-specific extra field
|
||||
model_extra = usage.get('model_extra', {})
|
||||
cache_write_tokens = model_extra.get('cache_creation_input_tokens')
|
||||
cache_write_tokens = model_extra.get('cache_creation_input_tokens', 0)
|
||||
if cache_write_tokens:
|
||||
stats += 'Input tokens (cache write): ' + str(cache_write_tokens) + '\n'
|
||||
|
||||
# Record in metrics
|
||||
# We'll treat cache_hit_tokens as "cache read" and cache_write_tokens as "cache write"
|
||||
self.metrics.add_token_usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_hit_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
response_id=response_id,
|
||||
)
|
||||
|
||||
# log the stats
|
||||
if stats:
|
||||
logger.debug(stats)
|
||||
|
||||
@@ -17,11 +17,23 @@ class ResponseLatency(BaseModel):
|
||||
response_id: str
|
||||
|
||||
|
||||
class TokenUsage(BaseModel):
|
||||
"""Metric tracking detailed token usage per completion call."""
|
||||
|
||||
model: str
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
cache_read_tokens: int
|
||||
cache_write_tokens: int
|
||||
response_id: str
|
||||
|
||||
|
||||
class Metrics:
|
||||
"""Metrics class can record various metrics during running and evaluation.
|
||||
Currently, we define the following metrics:
|
||||
accumulated_cost: the total cost (USD $) of the current LLM.
|
||||
response_latency: the time taken for each LLM completion call.
|
||||
We track:
|
||||
- accumulated_cost and costs
|
||||
- A list of ResponseLatency
|
||||
- A list of TokenUsage (one per call).
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: str = 'default') -> None:
|
||||
@@ -29,6 +41,7 @@ class Metrics:
|
||||
self._costs: list[Cost] = []
|
||||
self._response_latencies: list[ResponseLatency] = []
|
||||
self.model_name = model_name
|
||||
self._token_usages: list[TokenUsage] = []
|
||||
|
||||
@property
|
||||
def accumulated_cost(self) -> float:
|
||||
@@ -54,6 +67,16 @@ class Metrics:
|
||||
def response_latencies(self, value: list[ResponseLatency]) -> None:
|
||||
self._response_latencies = value
|
||||
|
||||
@property
|
||||
def token_usages(self) -> list[TokenUsage]:
|
||||
if not hasattr(self, '_token_usages'):
|
||||
self._token_usages = []
|
||||
return self._token_usages
|
||||
|
||||
@token_usages.setter
|
||||
def token_usages(self, value: list[TokenUsage]) -> None:
|
||||
self._token_usages = value
|
||||
|
||||
def add_cost(self, value: float) -> None:
|
||||
if value < 0:
|
||||
raise ValueError('Added cost cannot be negative.')
|
||||
@@ -67,10 +90,33 @@ class Metrics:
|
||||
)
|
||||
)
|
||||
|
||||
def add_token_usage(
|
||||
self,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cache_read_tokens: int,
|
||||
cache_write_tokens: int,
|
||||
response_id: str,
|
||||
) -> None:
|
||||
"""Add a single usage record."""
|
||||
self._token_usages.append(
|
||||
TokenUsage(
|
||||
model=self.model_name,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cache_read_tokens=cache_read_tokens,
|
||||
cache_write_tokens=cache_write_tokens,
|
||||
response_id=response_id,
|
||||
)
|
||||
)
|
||||
|
||||
def merge(self, other: 'Metrics') -> None:
|
||||
"""Merge 'other' metrics into this one."""
|
||||
self._accumulated_cost += other.accumulated_cost
|
||||
self._costs += other._costs
|
||||
self._response_latencies += other._response_latencies
|
||||
# use the property so older picked objects that lack the field won't crash
|
||||
self.token_usages += other.token_usages
|
||||
self.response_latencies += other.response_latencies
|
||||
|
||||
def get(self) -> dict:
|
||||
"""Return the metrics in a dictionary."""
|
||||
@@ -80,12 +126,14 @@ class Metrics:
|
||||
'response_latencies': [
|
||||
latency.model_dump() for latency in self._response_latencies
|
||||
],
|
||||
'token_usages': [usage.model_dump() for usage in self._token_usages],
|
||||
}
|
||||
|
||||
def reset(self):
|
||||
self._accumulated_cost = 0.0
|
||||
self._costs = []
|
||||
self._response_latencies = []
|
||||
self._token_usages = []
|
||||
|
||||
def log(self):
|
||||
"""Log the metrics."""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from openhands.memory.condenser import Condenser
|
||||
from openhands.memory.memory import LongTermMemory
|
||||
from openhands.memory.long_term_memory import LongTermMemory
|
||||
|
||||
__all__ = ['LongTermMemory', 'Condenser']
|
||||
|
||||
@@ -18,7 +18,7 @@ class ImportantEventSelection(BaseModel):
|
||||
class LLMAttentionCondenser(RollingCondenser):
|
||||
"""Rolling condenser strategy that uses an LLM to select the most important events when condensing the history."""
|
||||
|
||||
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 0):
|
||||
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
|
||||
if keep_first >= max_size // 2:
|
||||
raise ValueError(
|
||||
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user