mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
11 Commits
0.36.0
...
self-hoste
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2399174e89 | ||
|
|
7c3f4891f8 | ||
|
|
49bb7bbaba | ||
|
|
10c1252cfe | ||
|
|
911867492c | ||
|
|
85a1b47c8d | ||
|
|
d6011829a3 | ||
|
|
9200e1dbd8 | ||
|
|
d1343539ba | ||
|
|
8bc206833a | ||
|
|
7cf61d8c0e |
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.36-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.34-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
<div align="center">
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers"></a>
|
||||
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License"></a>
|
||||
<br/>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
|
||||
@@ -51,23 +52,23 @@ system requirements and more information.
|
||||
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
When you open the application, you'll be asked to choose an LLM provider and add an API key.
|
||||
[Anthropic's Claude 3.7 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-7-sonnet-20250219`)
|
||||
[Anthropic's Claude 3.5 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-5-sonnet-20241022`)
|
||||
works best, but you have [many options](https://docs.all-hands.dev/modules/usage/llms).
|
||||
|
||||
## 💡 Other ways to run OpenHands
|
||||
|
||||
@@ -391,7 +391,7 @@ type = "noop"
|
||||
#[llm.condenser]
|
||||
#model = "gpt-4o"
|
||||
#temperature = 0.1
|
||||
#max_input_tokens = 1024
|
||||
#max_tokens = 1024
|
||||
|
||||
#################################### Eval ####################################
|
||||
# Configuration for the evaluation, please refer to the specific evaluation
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.36-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.34-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -36,14 +36,7 @@ const config: Config = {
|
||||
mermaid: true,
|
||||
},
|
||||
themes: ['@docusaurus/theme-mermaid'],
|
||||
plugins: [
|
||||
[
|
||||
require.resolve('docusaurus-lunr-search'),
|
||||
{
|
||||
languages: ['en', 'zh', 'fr', 'ja', 'pt']
|
||||
}
|
||||
]
|
||||
],
|
||||
plugins: [],
|
||||
presets: [
|
||||
[
|
||||
'classic',
|
||||
@@ -92,10 +85,6 @@ const config: Config = {
|
||||
type: 'localeDropdown',
|
||||
position: 'left',
|
||||
},
|
||||
{
|
||||
type: 'search',
|
||||
position: 'left',
|
||||
},
|
||||
{
|
||||
href: 'https://all-hands.dev',
|
||||
label: 'Company',
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -44,7 +44,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -42,7 +42,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ OpenHandsがリポジトリで動作する際:
|
||||
|
||||
1. リポジトリに`.openhands/microagents/`が存在する場合、そこからリポジトリ固有の指示を読み込みます。
|
||||
2. 会話のキーワードによってトリガーされる一般的なガイドラインを読み込みます。
|
||||
現在の[パブリックMicroagents](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)を参照してください。
|
||||
現在の[パブリックMicroagents](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)を参照してください。
|
||||
|
||||
## Microagentのフォーマット
|
||||
|
||||
|
||||
@@ -88,4 +88,4 @@ triggers:
|
||||
- ビルド時間とイメージサイズを最適化
|
||||
```
|
||||
|
||||
より多くの例については、[現在のパブリックマイクロエージェント](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)をご覧ください。
|
||||
より多くの例については、[現在のパブリックマイクロエージェント](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)をご覧ください。
|
||||
|
||||
@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー
|
||||
|
||||
```bash
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-v $WORKSPACE_BASE:/opt/workspace_base \
|
||||
@@ -82,5 +82,5 @@ docker network create openhands-network
|
||||
# 分離されたネットワークで OpenHands を実行
|
||||
docker run # ... \
|
||||
--network openhands-network \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "escreva um script bash que imprima oi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@
|
||||
A maneira mais fácil de executar o OpenHands é no Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
Você encontrará o OpenHands em execução em http://localhost:3000!
|
||||
|
||||
@@ -13,7 +13,7 @@ Quando o OpenHands trabalha com um repositório, ele:
|
||||
|
||||
1. Carrega instruções específicas do repositório de `.openhands/microagents/`, se presentes no repositório.
|
||||
2. Carrega diretrizes gerais acionadas por palavras-chave nas conversas.
|
||||
Veja os [Microagentes Públicos](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents) atuais.
|
||||
Veja os [Microagentes Públicos](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) atuais.
|
||||
|
||||
## Formato do Microagente
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
Microagentes públicos são diretrizes especializadas acionadas por palavras-chave para todos os usuários do OpenHands.
|
||||
Eles são definidos em arquivos markdown no diretório
|
||||
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents).
|
||||
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge).
|
||||
|
||||
Microagentes públicos:
|
||||
- Monitoram comandos recebidos em busca de suas palavras-chave de acionamento.
|
||||
@@ -149,5 +149,5 @@ Lembre-se de:
|
||||
- Otimizar para tempo de build e tamanho da imagem
|
||||
```
|
||||
|
||||
Veja os [microagentes públicos atuais](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents) para
|
||||
Veja os [microagentes públicos atuais](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) para
|
||||
mais exemplos.
|
||||
|
||||
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -136,6 +136,7 @@ OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if pro
|
||||
## Tips for Effective Use
|
||||
|
||||
- Be specific in your requests to get the most accurate and helpful responses, as described in the [prompting best practices](../prompting/prompting-best-practices).
|
||||
- Use the workspace panel to explore your project structure.
|
||||
- Use one of the recommended models, as described in the [LLMs section](usage/llms/llms.md).
|
||||
|
||||
Remember, the GUI mode of OpenHands is designed to make your interaction with the AI assistant as smooth and intuitive
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.36-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.36
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
|
||||
@@ -9,9 +9,11 @@
|
||||
### Changes
|
||||
- Shows the file changes performed by OpenHands.
|
||||
|
||||
### VS Code
|
||||
- Embedded VS Code for browsing and modifying files.
|
||||
- Can also be used to upload and download files.
|
||||
### Workspace
|
||||
- Browse project files and directories.
|
||||
- Use the `Open in VS Code` option to:
|
||||
* Modify files
|
||||
* Upload and download files
|
||||
|
||||
### Terminal
|
||||
- A space for OpenHands and users to run terminal commands.
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
Keyword-triggered microagents provide OpenHands with specific instructions that are activated when certain keywords
|
||||
appear in the prompt. This is useful for tailoring behavior based on particular tools, languages, or frameworks.
|
||||
|
||||
## Usage
|
||||
## Microagent File
|
||||
|
||||
These microagents are only loaded when a prompt includes one of the trigger words.
|
||||
Create a keyword-triggered microagent (example: `.openhands/microagents/trigger-keyword.md`) to include instructions
|
||||
that activate only for prompts with specific keywords.
|
||||
|
||||
## Frontmatter Syntax
|
||||
|
||||
@@ -18,21 +19,31 @@ Enclose the frontmatter in triple dashes (---) and include the following fields:
|
||||
|
||||
| Field | Description | Required | Default |
|
||||
|------------|--------------------------------------------------|----------|------------------|
|
||||
| `name` | A unique identifier for the microagent. | Yes | 'default' |
|
||||
| `type` | Type of microagent. Must be set to `knowledge`. | Yes | 'repo' |
|
||||
| `triggers` | A list of keywords that activate the microagent. | Yes | None |
|
||||
| `agent` | The agent this microagent applies to. | No | 'CodeActAgent' |
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
Keyword-triggered microagent file example located at `.openhands/microagents/yummy.md`:
|
||||
```
|
||||
---
|
||||
name: magic_word
|
||||
type: knowledge
|
||||
triggers:
|
||||
- yummyhappy
|
||||
- happyyummy
|
||||
agent: CodeActAgent
|
||||
---
|
||||
|
||||
The user has said the magic word. Respond with "That was delicious!"
|
||||
```
|
||||
|
||||
[See examples of microagents triggered by keywords in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)
|
||||
Keyword-triggered microagents:
|
||||
- Monitor incoming prompts for specified trigger words.
|
||||
- Activate when relevant triggers are detected.
|
||||
- Apply their specialized knowledge and capabilities.
|
||||
- Follow defined guidelines and restrictions.
|
||||
|
||||
[See examples of microagents triggered by keywords in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)
|
||||
|
||||
@@ -34,7 +34,7 @@ some-repository/
|
||||
Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
|
||||
is required:
|
||||
|
||||
| Microagent Type | Required |
|
||||
|----------------------------------|----------|
|
||||
| `General Repository Microagents` | No |
|
||||
| `Keyword-Triggered Microagents` | Yes |
|
||||
| Microagent Type | Frontmatter Requirement |
|
||||
|----------------------------------|-------------------------------------------------------|
|
||||
| `General Repository Microagents` | Required only if more than one of this type exists. |
|
||||
| `Keyword-Triggered Microagents` | Required. |
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
|
||||
## Overview
|
||||
|
||||
Global microagents are [keyword-triggered microagents](./microagents-keyword) that apply to all OpenHands users. A list of the current
|
||||
global microagents can be found [in the OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents).
|
||||
Global microagents are [keyword-triggered microagents](./microagents-keyword) that apply to all OpenHands users.
|
||||
|
||||
## Contributing a Global Microagent
|
||||
|
||||
|
||||
@@ -4,24 +4,31 @@
|
||||
|
||||
General guidelines for OpenHands to work more effectively with the repository.
|
||||
|
||||
## Usage
|
||||
## Microagent File
|
||||
|
||||
These microagents are always loaded as part of the context.
|
||||
Create a general repository microagent (example: `.openhands/microagents/repo.md`) to include
|
||||
project-specific instructions, team practices, coding standards, and architectural guidelines that are relevant for
|
||||
**all** prompts in that repository.
|
||||
|
||||
## Frontmatter Syntax
|
||||
|
||||
The frontmatter for this type of microagent is optional.
|
||||
The frontmatter for this type of microagent is optional, unless you plan to include more than one general
|
||||
repository microagent.
|
||||
|
||||
Frontmatter should be enclosed in triple dashes (---) and may include the following fields:
|
||||
|
||||
| Field | Description | Required | Default |
|
||||
|-----------|-----------------------------------------|----------|----------------|
|
||||
| `agent` | The agent this microagent applies to | No | 'CodeActAgent' |
|
||||
| Field | Description | Required | Default |
|
||||
|-----------|-----------------------------------------|--------------------------------------------------------------------|----------------|
|
||||
| `name` | A unique identifier for the microagent | Required only if using more than one general repository microagent | 'default' |
|
||||
| `agent` | The agent this microagent applies to | No | 'CodeActAgent' |
|
||||
|
||||
## Example
|
||||
|
||||
General repository microagent file example located at `.openhands/microagents/repo.md`:
|
||||
```
|
||||
---
|
||||
name: repo
|
||||
---
|
||||
|
||||
This project is a TODO application that allows users to track TODO items.
|
||||
|
||||
To set it up, you can run `npm run build`.
|
||||
|
||||
1120
docs/package-lock.json
generated
1120
docs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -22,9 +22,7 @@
|
||||
"@docusaurus/preset-classic": "^3.7.0",
|
||||
"@docusaurus/theme-mermaid": "^3.7.0",
|
||||
"@mdx-js/react": "^3.1.0",
|
||||
"@node-rs/jieba": "^2.0.1",
|
||||
"clsx": "^2.0.0",
|
||||
"docusaurus-lunr-search": "^3.6.0",
|
||||
"prism-react-renderer": "^2.4.1",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
@@ -54,5 +52,5 @@
|
||||
"engines": {
|
||||
"node": ">=18.0"
|
||||
},
|
||||
"packageManager": "npm@10.5.0"
|
||||
}
|
||||
"packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ export function HomepageHeader() {
|
||||
<div align="center" className="header-links">
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers" /></a>
|
||||
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
|
||||
<br/>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
|
||||
@@ -52,7 +53,7 @@ export function HomepageHeader() {
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
|
||||
<br/>
|
||||
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" /></a>
|
||||
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
|
||||
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
BIN
docs/static/img/oh-features.png
vendored
BIN
docs/static/img/oh-features.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 144 KiB After Width: | Height: | Size: 120 KiB |
10103
docs/yarn.lock
Normal file
10103
docs/yarn.lock
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,65 +0,0 @@
|
||||
# Multi-swe-bench Evaluation with OpenHands
|
||||
|
||||
## LLM Setup
|
||||
|
||||
Please follow [here](../../README.md#setup).
|
||||
|
||||
## Dataset Preparing
|
||||
|
||||
Please download the [**Multi-SWE-Bench** dataset](https://huggingface.co/datasets/bytedance-research/Multi-SWE-Bench).
|
||||
And change the dataset following [script](scripts/data/data_change.py).
|
||||
|
||||
```bash
|
||||
python evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py
|
||||
```
|
||||
|
||||
## Docker image download
|
||||
|
||||
Please download the multi-swe-bench dokcer images from [here](https://github.com/multi-swe-bench/multi-swe-bench?tab=readme-ov-file#run-evaluation).
|
||||
|
||||
## Generate patch
|
||||
|
||||
Please edit the [script](infer.sh) and run it.
|
||||
|
||||
```bash
|
||||
bash evaluation/benchmarks/multi_swe_bench/infer.sh
|
||||
```
|
||||
|
||||
Script variable explanation:
|
||||
|
||||
- `models`, e.g. `llm.eval_gpt4_1106_preview`, is the config group name for your
|
||||
LLM settings, as defined in your `config.toml`.
|
||||
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
|
||||
like to evaluate. It could also be a release tag like `0.6.2`.
|
||||
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting to `CodeActAgent`.
|
||||
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
|
||||
default, the script evaluates the (500 issues), which will no exceed the maximum of the dataset number.
|
||||
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
|
||||
default, it is set to 50.
|
||||
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
|
||||
default, it is set to 1.
|
||||
- `language`, the language of your evaluating dataset.
|
||||
- `dataset`, the absolute position of the dataset jsonl.
|
||||
|
||||
The results will be generated in evaluation/evaluation_outputs/outputs/XXX/CodeActAgent/YYY/output.jsonl, you can refer to the [example](examples/output.jsonl).
|
||||
|
||||
## Runing evaluation
|
||||
|
||||
First, install [multi-swe-bench](https://github.com/multi-swe-bench/multi-swe-bench).
|
||||
|
||||
```bash
|
||||
pip install multi-swe-bench
|
||||
```
|
||||
|
||||
Second, convert the output.jsonl to patch.jsonl with [script](scripts/eval/convert.py), you can refer to the [example](examples/patch.jsonl).
|
||||
|
||||
```bash
|
||||
python evaluation/benchmarks/multi_swe_bench/scripts/eval/convert.py
|
||||
```
|
||||
|
||||
Finally, evaluate with multi-swe-bench.
|
||||
The config file config.json can be refer to the [example](examples/config.json) or [github](https://github.com/multi-swe-bench/multi-swe-bench/tree/main?tab=readme-ov-file#configuration-file-example).
|
||||
|
||||
```bash
|
||||
python -m multi_swe_bench.harness.run_evaluation --config config.json
|
||||
```
|
||||
@@ -1,456 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from functools import partial
|
||||
|
||||
import pandas as pd
|
||||
from swebench.harness.grading import get_eval_report
|
||||
from swebench.harness.run_evaluation import (
|
||||
APPLY_PATCH_FAIL,
|
||||
APPLY_PATCH_PASS,
|
||||
)
|
||||
from swebench.harness.test_spec import SWEbenchInstance, TestSpec, make_test_spec
|
||||
from swebench.harness.utils import load_swebench_dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from evaluation.benchmarks.swe_bench.resource.mapping import (
|
||||
get_instance_resource_factor,
|
||||
)
|
||||
from evaluation.benchmarks.swe_bench.run_infer import get_instance_docker_image
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
LLMConfig,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
|
||||
# TODO: migrate all swe-bench docker to ghcr.io/openhands
|
||||
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
|
||||
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
|
||||
|
||||
|
||||
def process_git_patch(patch):
|
||||
if not isinstance(patch, str):
|
||||
return ''
|
||||
|
||||
if not patch.strip():
|
||||
# skip empty patches
|
||||
return ''
|
||||
|
||||
patch = patch.replace('\r\n', '\n')
|
||||
# There might be some weird characters at the beginning of the patch
|
||||
# due to some OpenHands inference command outputs
|
||||
|
||||
# FOR EXAMPLE:
|
||||
# git diff --no-color --cached 895f28f9cbed817c00ab68770433170d83132d90
|
||||
# [A[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[K0
|
||||
# diff --git a/django/db/models/sql/.backup.query.py b/django/db/models/sql/.backup.query.py
|
||||
# new file mode 100644
|
||||
# index 0000000000..fc13db5948
|
||||
|
||||
# We "find" the first line that starts with "diff" and then we remove lines before it
|
||||
lines = patch.split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith('diff --git'):
|
||||
patch = '\n'.join(lines[i:])
|
||||
break
|
||||
|
||||
patch = patch.rstrip() + '\n' # Make sure the last line ends with a newline
|
||||
return patch
|
||||
|
||||
|
||||
def get_config(metadata: EvalMetadata, instance: pd.Series) -> AppConfig:
|
||||
# We use a different instance image for the each instance of swe-bench eval
|
||||
base_container_image = get_instance_docker_image(instance['instance_id'])
|
||||
logger.info(
|
||||
f'Using instance container image: {base_container_image}. '
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = AppConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
def process_instance(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
log_dir: str | None = None,
|
||||
runtime_failure_count: int = 0,
|
||||
) -> EvalOutput:
|
||||
"""
|
||||
Evaluate agent performance on a SWE-bench problem instance.
|
||||
|
||||
Note that this signature differs from the expected input to `run_evaluation`. Use
|
||||
`functools.partial` to provide optional arguments before passing to the evaluation harness.
|
||||
|
||||
Args:
|
||||
log_dir (str | None, default=None): Path to directory where log files will be written. Must
|
||||
be provided if `reset_logger` is set.
|
||||
|
||||
Raises:
|
||||
AssertionError: if the `reset_logger` flag is set without a provided log directory.
|
||||
"""
|
||||
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
|
||||
if reset_logger:
|
||||
assert (
|
||||
log_dir is not None
|
||||
), "Can't reset logger without a provided log directory."
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
config = get_config(metadata, instance)
|
||||
instance_id = instance.instance_id
|
||||
model_patch = instance['model_patch']
|
||||
test_spec: TestSpec = instance['test_spec']
|
||||
logger.info(f'Starting evaluation for instance {instance_id}.')
|
||||
|
||||
if 'test_result' not in instance.keys():
|
||||
instance['test_result'] = {}
|
||||
instance['test_result']['report'] = {
|
||||
'empty_generation': False,
|
||||
'resolved': False,
|
||||
'failed_apply_patch': False,
|
||||
'error_eval': False,
|
||||
'test_timeout': False,
|
||||
}
|
||||
|
||||
if model_patch == '':
|
||||
instance['test_result']['report']['empty_generation'] = True
|
||||
return EvalOutput(
|
||||
instance_id=instance_id,
|
||||
test_result=instance['test_result'],
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Increase resource_factor with increasing attempt_id
|
||||
if runtime_failure_count > 0:
|
||||
config.sandbox.remote_runtime_resource_factor = min(
|
||||
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
|
||||
8,
|
||||
)
|
||||
logger.warning(
|
||||
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||
)
|
||||
|
||||
try:
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
# Get patch and save it to /tmp/patch.diff
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Patch file
|
||||
patch_file_path = os.path.join(temp_dir, 'patch.diff')
|
||||
with open(patch_file_path, 'w') as f:
|
||||
f.write(model_patch)
|
||||
runtime.copy_to(patch_file_path, '/tmp')
|
||||
# Eval script
|
||||
eval_script_path = os.path.join(temp_dir, 'eval.sh')
|
||||
with open(eval_script_path, 'w') as f:
|
||||
f.write(test_spec.eval_script)
|
||||
runtime.copy_to(eval_script_path, '/tmp')
|
||||
|
||||
# Set +x
|
||||
action = CmdRunAction(command='chmod +x /tmp/eval.sh')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Apply patch
|
||||
exec_command = (
|
||||
'cd /testbed && '
|
||||
"(git apply -v /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
|
||||
"(echo 'Failed to apply patch with git apply, trying with patch command...' && "
|
||||
"(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
|
||||
"echo 'APPLY_PATCH_FAIL')))"
|
||||
)
|
||||
action = CmdRunAction(command=exec_command)
|
||||
action.set_hard_timeout(600)
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
apply_patch_output = obs.content
|
||||
assert isinstance(apply_patch_output, str)
|
||||
instance['test_result']['apply_patch_output'] = apply_patch_output
|
||||
|
||||
if 'APPLY_PATCH_FAIL' in apply_patch_output:
|
||||
logger.info(f'[{instance_id}] {APPLY_PATCH_FAIL}:\n{apply_patch_output}')
|
||||
instance['test_result']['report']['failed_apply_patch'] = True
|
||||
|
||||
return EvalOutput(
|
||||
instance_id=instance_id,
|
||||
test_result=instance['test_result'],
|
||||
metadata=metadata,
|
||||
)
|
||||
elif 'APPLY_PATCH_PASS' in apply_patch_output:
|
||||
logger.info(f'[{instance_id}] {APPLY_PATCH_PASS}:\n{apply_patch_output}')
|
||||
|
||||
# Run eval script in background and save output to log file
|
||||
log_file = '/tmp/eval_output.log'
|
||||
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
|
||||
action.set_hard_timeout(300) # Short timeout just to get the process ID
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
if isinstance(obs, CmdOutputObservation) and obs.exit_code == 0:
|
||||
pid = obs.content.split()[-1].strip()
|
||||
logger.info(
|
||||
f'[{instance_id}] Evaluation process started with PID: {pid}'
|
||||
)
|
||||
|
||||
# Poll for completion
|
||||
start_time = time.time()
|
||||
timeout = 1800 # 30 minutes
|
||||
while True:
|
||||
seconds_elapsed = time.time() - start_time
|
||||
if seconds_elapsed > timeout:
|
||||
logger.info(
|
||||
f'[{instance_id}] Evaluation timed out after {timeout} seconds'
|
||||
)
|
||||
instance['test_result']['report']['test_timeout'] = True
|
||||
break
|
||||
check_action = CmdRunAction(
|
||||
command=f'ps -p {pid} > /dev/null; echo $?'
|
||||
)
|
||||
check_action.set_hard_timeout(300)
|
||||
check_obs = runtime.run_action(check_action)
|
||||
if (
|
||||
isinstance(check_obs, CmdOutputObservation)
|
||||
and check_obs.content.split()[-1].strip() == '1'
|
||||
):
|
||||
logger.info(
|
||||
f'[{instance_id}] Evaluation process completed after {seconds_elapsed} seconds'
|
||||
)
|
||||
break
|
||||
logger.info(
|
||||
f'[{instance_id}] [{seconds_elapsed:.0f}s] Evaluation still running, waiting...'
|
||||
)
|
||||
time.sleep(30) # Wait for 30 seconds before checking again
|
||||
|
||||
# Read the log file
|
||||
cat_action = CmdRunAction(command=f'cat {log_file}')
|
||||
cat_action.set_hard_timeout(300)
|
||||
cat_obs = runtime.run_action(cat_action)
|
||||
|
||||
# Grade answer
|
||||
if isinstance(cat_obs, CmdOutputObservation) and cat_obs.exit_code == 0:
|
||||
test_output = cat_obs.content
|
||||
assert isinstance(test_output, str)
|
||||
instance['test_result']['test_output'] = test_output
|
||||
|
||||
# Get report from test output
|
||||
logger.info(f'[{instance_id}] Grading answer...')
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Create a directory structure that matches the expected format
|
||||
# NOTE: this is a hack to make the eval report format consistent
|
||||
# with the original SWE-Bench eval script
|
||||
log_dir = os.path.join(temp_dir, 'logs', instance_id.lower())
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
test_output_path = os.path.join(log_dir, 'test_output.txt')
|
||||
with open(test_output_path, 'w') as f:
|
||||
f.write(test_output)
|
||||
try:
|
||||
_report = get_eval_report(
|
||||
test_spec=test_spec,
|
||||
prediction={
|
||||
'model_patch': model_patch,
|
||||
'instance_id': instance_id,
|
||||
},
|
||||
log_path=test_output_path,
|
||||
include_tests_status=True,
|
||||
)
|
||||
report = _report[instance_id]
|
||||
logger.info(
|
||||
f"[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report['resolved']}"
|
||||
)
|
||||
instance['test_result']['report']['resolved'] = report[
|
||||
'resolved'
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'[{instance_id}] Error when getting eval report: {e}'
|
||||
)
|
||||
instance['test_result']['report']['resolved'] = False
|
||||
instance['test_result']['report']['error_eval'] = True
|
||||
else:
|
||||
logger.info(f'[{instance_id}] Error when starting eval:\n{obs.content}')
|
||||
instance['test_result']['report']['error_eval'] = True
|
||||
|
||||
return EvalOutput(
|
||||
instance_id=instance_id,
|
||||
test_result=instance['test_result'],
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f'[{instance_id}] Unexpected output when applying patch:\n{apply_patch_output}'
|
||||
)
|
||||
raise RuntimeError(
|
||||
instance_id,
|
||||
f'Unexpected output when applying patch:\n{apply_patch_output}',
|
||||
logger,
|
||||
)
|
||||
finally:
|
||||
runtime.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = get_parser()
|
||||
parser.add_argument(
|
||||
'--input-file',
|
||||
type=str,
|
||||
help='Path to input predictions file',
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dataset',
|
||||
type=str,
|
||||
default='princeton-nlp/SWE-bench',
|
||||
help='data set to evaluate on, either full-test or lite-test',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--split',
|
||||
type=str,
|
||||
default='test',
|
||||
help='split to evaluate on',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# Load SWE-Bench dataset
|
||||
full_dataset: list[SWEbenchInstance] = load_swebench_dataset(
|
||||
args.dataset, args.split
|
||||
)
|
||||
instance_id_to_instance = {
|
||||
instance['instance_id']: instance for instance in full_dataset
|
||||
}
|
||||
logger.info(
|
||||
f'Loaded dataset {args.dataset} with split {args.split} to run inference on.'
|
||||
)
|
||||
|
||||
# Load predictions
|
||||
assert args.input_file.endswith('.jsonl'), 'Input file must be a jsonl file.'
|
||||
required_fields = ['instance_id', 'model_patch', 'test_result']
|
||||
with open(args.input_file) as f:
|
||||
predictions = pd.DataFrame.from_records(
|
||||
[
|
||||
{k: v for k, v in json.loads(line).items() if k in required_fields}
|
||||
for line in tqdm(f, desc='Loading predictions')
|
||||
]
|
||||
)
|
||||
assert (
|
||||
'instance_id' in predictions.columns
|
||||
), 'Input file must contain instance_id column.'
|
||||
|
||||
if 'model_patch' not in predictions.columns and (
|
||||
'test_result' in predictions.columns
|
||||
and 'model_patch' in predictions['test_result'].iloc[0]
|
||||
):
|
||||
raise ValueError(
|
||||
'Input file must contain model_patch column OR test_result column with model_patch field.'
|
||||
)
|
||||
assert len(predictions['instance_id'].unique()) == len(
|
||||
predictions
|
||||
), 'instance_id column must be unique.'
|
||||
|
||||
if 'model_patch' not in predictions.columns:
|
||||
predictions['model_patch'] = predictions['test_result'].apply(
|
||||
lambda x: x.get('git_patch', '')
|
||||
)
|
||||
assert {'instance_id', 'model_patch'}.issubset(
|
||||
set(predictions.columns)
|
||||
), 'Input file must contain instance_id and model_patch columns.'
|
||||
|
||||
# Process model_patch
|
||||
predictions['model_patch'] = predictions['model_patch'].apply(process_git_patch)
|
||||
|
||||
# Merge predictions with dataset
|
||||
predictions['instance'] = predictions['instance_id'].apply(
|
||||
lambda x: instance_id_to_instance[x]
|
||||
)
|
||||
predictions['test_spec'] = predictions['instance'].apply(make_test_spec)
|
||||
|
||||
# Prepare dataset
|
||||
output_file = args.input_file.replace('.jsonl', '.swebench_eval.jsonl')
|
||||
instances = prepare_dataset(predictions, output_file, args.eval_n_limit)
|
||||
|
||||
# If possible, load the relevant metadata to avoid issues with `run_evaluation`.
|
||||
metadata: EvalMetadata | None = None
|
||||
metadata_filepath = os.path.join(os.path.dirname(args.input_file), 'metadata.json')
|
||||
if os.path.exists(metadata_filepath):
|
||||
with open(metadata_filepath, 'r') as metadata_file:
|
||||
data = metadata_file.read()
|
||||
metadata = EvalMetadata.model_validate_json(data)
|
||||
else:
|
||||
# Initialize with a dummy metadata when file doesn't exist
|
||||
metadata = EvalMetadata(
|
||||
agent_class='dummy_agent', # Placeholder agent class
|
||||
llm_config=LLMConfig(model='dummy_model'), # Minimal LLM config
|
||||
max_iterations=1, # Minimal iterations
|
||||
eval_output_dir=os.path.dirname(
|
||||
args.input_file
|
||||
), # Use input file dir as output dir
|
||||
start_time=time.strftime('%Y-%m-%d %H:%M:%S'), # Current time
|
||||
git_commit=subprocess.check_output(['git', 'rev-parse', 'HEAD'])
|
||||
.decode('utf-8')
|
||||
.strip(), # Current commit
|
||||
dataset=args.dataset, # Dataset name from args
|
||||
)
|
||||
|
||||
# The evaluation harness constrains the signature of `process_instance_func` but we need to
|
||||
# pass extra information. Build a new function object to avoid issues with multiprocessing.
|
||||
process_instance_func = partial(
|
||||
process_instance, log_dir=output_file.replace('.jsonl', '.logs')
|
||||
)
|
||||
|
||||
run_evaluation(
|
||||
instances,
|
||||
metadata=metadata,
|
||||
output_file=output_file,
|
||||
num_workers=args.eval_num_workers,
|
||||
process_instance_func=process_instance_func,
|
||||
)
|
||||
|
||||
# Load evaluated predictions & print number of resolved predictions
|
||||
evaluated_predictions = pd.read_json(output_file, lines=True)
|
||||
fields = ['resolved', 'failed_apply_patch', 'error_eval', 'empty_generation']
|
||||
|
||||
def count_report_field(row, field):
|
||||
return row['test_result']['report'][field]
|
||||
|
||||
report = {}
|
||||
for field in fields:
|
||||
count = evaluated_predictions.apply(
|
||||
count_report_field, args=(field,), axis=1
|
||||
).sum()
|
||||
report[field] = count
|
||||
logger.info(
|
||||
f'# {field}: {count} / {len(evaluated_predictions)}. ({count / len(evaluated_predictions):.2%})'
|
||||
)
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"mode": "evaluation",
|
||||
"workdir": "./data/workdir",
|
||||
"patch_files": [
|
||||
"./data/patches/<your_patch_file>.jsonl"
|
||||
],
|
||||
"dataset_files": [
|
||||
"./data/patches/<to_evaluate_dataset_file>.jsonl"
|
||||
],
|
||||
"force_build": false,
|
||||
"output_dir": "./data/dataset",
|
||||
"specifics": [],
|
||||
"skips": [],
|
||||
"repo_dir": "./data/repos",
|
||||
"need_clone": false,
|
||||
"global_env": [],
|
||||
"clear_env": true,
|
||||
"stop_on_error": true,
|
||||
"max_workers": 8,
|
||||
"max_workers_build_image": 8,
|
||||
"max_workers_run_instance": 8,
|
||||
"log_dir": "./data/logs",
|
||||
"log_level": "DEBUG"
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1,3 +0,0 @@
|
||||
{"org": "ponylang", "repo": "ponyc", "number": "4595", "fix_patch": "diff --git a/src/libponyc/ast/parser.c b/src/libponyc/ast/parser.c\nindex 9852922f..2c37d6b8 100644\n--- a/src/libponyc/ast/parser.c\n+++ b/src/libponyc/ast/parser.c\n@@ -693,6 +693,7 @@ DEF(idseqsingle);\n AST_NODE(TK_LET);\n TOKEN(\"variable name\", TK_ID);\n AST_NODE(TK_NONE); // Type\n+ SET_FLAG(AST_FLAG_IN_PARENS);\n DONE();\n \n // idseq"}
|
||||
{"org": "ponylang", "repo": "ponyc", "number": "4593", "fix_patch": "diff --git a/packages/cli/command_parser.pony b/packages/cli/command_parser.pony\nindex a5acce8e..fa97808b 100644\n--- a/packages/cli/command_parser.pony\n+++ b/packages/cli/command_parser.pony\n@@ -100,6 +100,7 @@ class CommandParser\n | let cs: CommandSpec box =>\n return CommandParser._sub(cs, this).\n _parse_command(tokens, options, args, envsmap, opt_stop)\n+// Correctly handle parent default options\n end\n else\n return SyntaxError(token, \"unknown command\")"}
|
||||
{"org": "ponylang", "repo": "ponyc", "number": "4588", "fix_patch": "diff --git a/src/libponyc/expr/match.c b/src/libponyc/expr/match.c\nindex 7d16066f..c2ec7056 100644\n--- a/src/libponyc/expr/match.c\n+++ b/src/libponyc/expr/match.c\n@@ -314,8 +314,10 @@ static ast_t* make_pattern_type(pass_opt_t* opt, ast_t* pattern)\n case TK_DONTCAREREF:\n case TK_MATCH_CAPTURE:\n case TK_MATCH_DONTCARE:\n+ if (ast_id(pattern_type) == TK_ISO) pattern_type = set_cap_and_ephemeral(pattern_type, TK_TRN, TK_EPHEMERAL);\n return pattern_type;\n \n+\n case TK_TUPLE:\n {\n ast_t* pattern_child = ast_child(pattern);"}
|
||||
@@ -1,32 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
BASE_SCRIPT="./evaluation/benchmarks/multi_swe_bench/scripts/run_infer.sh"
|
||||
|
||||
MODELS=("aaa" "bbb" "ccc" "ddd" "fff")
|
||||
GIT_VERSION="HEAD"
|
||||
AGENT_NAME="CodeActAgent"
|
||||
EVAL_LIMIT="500"
|
||||
MAX_ITER="50"
|
||||
NUM_WORKERS="1"
|
||||
LANGUAGE="XXX"
|
||||
DATASET="XXX"
|
||||
|
||||
|
||||
for MODEL in "${MODELS[@]}"; do
|
||||
echo "=============================="
|
||||
echo "Running benchmark for MODEL: $MODEL"
|
||||
echo "=============================="
|
||||
|
||||
$BASE_SCRIPT \
|
||||
"$MODEL" \
|
||||
"$GIT_VERSION" \
|
||||
"$AGENT_NAME" \
|
||||
"$EVAL_LIMIT" \
|
||||
"$MAX_ITER" \
|
||||
"$NUM_WORKERS" \
|
||||
"$DATASET" \
|
||||
"$LANGUAGE"
|
||||
|
||||
echo "Completed $MODEL"
|
||||
done
|
||||
@@ -1,39 +0,0 @@
|
||||
"""Mapping instance_id to resource_factor.
|
||||
|
||||
Different instances may have different resource requirements.
|
||||
e.g., some instances may require more memory/CPU to run inference.
|
||||
This file tracks the resource requirements of different instances.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
DEFAULT_RUNTIME_RESOURCE_FACTOR = int(
|
||||
os.environ.get('DEFAULT_RUNTIME_RESOURCE_FACTOR', 1)
|
||||
)
|
||||
|
||||
# dataset to resource mapping
|
||||
_global_resource_mapping: dict[str, dict[str, float]] = {}
|
||||
|
||||
|
||||
def get_resource_mapping(dataset_name: str) -> dict[str, float]:
|
||||
if dataset_name not in _global_resource_mapping:
|
||||
file_path = os.path.join(CUR_DIR, f'{dataset_name}.json')
|
||||
if not os.path.exists(file_path):
|
||||
logger.warning(f'Resource mapping for {dataset_name} not found.')
|
||||
return None
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
_global_resource_mapping[dataset_name] = json.load(f)
|
||||
logger.info(f'Loaded resource mapping for {dataset_name}')
|
||||
return _global_resource_mapping[dataset_name]
|
||||
|
||||
|
||||
def get_instance_resource_factor(dataset_name: str, instance_id: str) -> int:
|
||||
resource_mapping = get_resource_mapping(dataset_name)
|
||||
if resource_mapping is None:
|
||||
return DEFAULT_RUNTIME_RESOURCE_FACTOR
|
||||
return int(resource_mapping.get(instance_id, DEFAULT_RUNTIME_RESOURCE_FACTOR))
|
||||
@@ -1,853 +0,0 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import toml
|
||||
from datasets import load_dataset
|
||||
|
||||
import openhands.agenthub
|
||||
from evaluation.benchmarks.swe_bench.resource.mapping import (
|
||||
get_instance_resource_factor,
|
||||
)
|
||||
from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_llm_config_for_completions_logging,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import CmdRunAction, MessageAction, FileReadAction
|
||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||
from openhands.events.serialization.event import event_to_dict
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
||||
import pdb
|
||||
|
||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||
USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'true').lower() == 'true'
|
||||
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
|
||||
|
||||
# TODO: migrate all swe-bench docker to ghcr.io/openhands
|
||||
# TODO: 适应所有的语言
|
||||
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', '')
|
||||
LANGUAGE =os.environ.get('LANGUAGE', 'python')
|
||||
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
|
||||
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
}
|
||||
|
||||
|
||||
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
|
||||
return f'{instance.repo}__{instance.version}'.replace('/', '__')
|
||||
|
||||
|
||||
def get_instruction(instance: pd.Series, metadata: EvalMetadata):
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
# Prepare instruction
|
||||
|
||||
# Instruction based on Anthropic's official trajectory
|
||||
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
|
||||
instructions = {
|
||||
"python":(
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"java": (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Java code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
"Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n"
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Java environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
"Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n"
|
||||
"Follow these steps to resolve the issue:\n"
|
||||
"1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n"
|
||||
'2. Create a Java class to reproduce the error and execute it by first compiling with `javac <classname>.java` and then running with `java <classname>` using the BashTool, to confirm the error\n'
|
||||
"3. Edit the sourcecode of the repo to resolve the issue.\n"
|
||||
"4. Rerun your reproduce script or class and confirm that the error is fixed!\n"
|
||||
"5. Think about edgecases, add comprehensive tests for them in your reproduce class or script, and run them to make sure your fix handles these cases as well.\n"
|
||||
f"6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance['base_commit']}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n"
|
||||
" - The issue you are fixing\n"
|
||||
" - The files you modified\n"
|
||||
" - The functions or classes you changed\n"
|
||||
" Make sure all these tests pass with your changes.\n"
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"go": (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Go code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Go environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script or a function to reproduce the error and execute it with `go run <filename.go>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"c": (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a C code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development C environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error by compiling your C code (for example, using `gcc <filename.c> -o <executable>`) and then running the executable using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"cpp": (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a C++ code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development C++ environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create or adapt a small executable (e.g., a main file or a test driver) to reproduce the issue. Build and run it (for example, by using `g++ -o reproduce reproduce.cpp && ./reproduce` via the BashTool) to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"javascript": (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Javascript code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Javascript environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `node <filename.js>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"typescript":(
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Typescript code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Typescript environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `ts-node <filename.ts>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
"rust":(
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Rust code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Rust environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a reproduction script (or binary) that triggers the error and execute it with `cargo run --bin <filename>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
)
|
||||
}
|
||||
instruction = instructions.get(LANGUAGE.lower())
|
||||
|
||||
|
||||
if instruction and RUN_WITH_BROWSING:
|
||||
instruction += (
|
||||
'<IMPORTANT!>\n'
|
||||
'You SHOULD NEVER attempt to browse the web. '
|
||||
'</IMPORTANT!>\n'
|
||||
)
|
||||
return instruction
|
||||
|
||||
|
||||
|
||||
# TODO: 适应所有的语言
|
||||
# def get_instance_docker_image(instance_id: str) -> str:
|
||||
# image_name = 'sweb.eval.x86_64.' + instance_id
|
||||
# if LANGUAGE == 'python':
|
||||
# image_name = image_name.replace(
|
||||
# '__', '_s_'
|
||||
# ) # to comply with docker image naming convention
|
||||
# return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
|
||||
# else:
|
||||
# return image_name.lower() ##加载本地的
|
||||
def get_instance_docker_image(instance: pd.Series):
|
||||
if LANGUAGE == 'python':
|
||||
image_name = 'sweb.eval.x86_64.' + instance['instance_id']
|
||||
image_name = image_name.replace(
|
||||
'__', '_s_'
|
||||
) # to comply with docker image naming convention
|
||||
return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
|
||||
else:
|
||||
container_name = instance.get('repo', '').lower()
|
||||
container_name = container_name.replace('/', '_m_')
|
||||
instance_id = instance.get('instance_id', '')
|
||||
tag_suffix = instance_id.split('-')[-1] if instance_id else ''
|
||||
container_tag = f"pr-{tag_suffix}"
|
||||
# pdb.set_trace()
|
||||
return f"mswebench/{container_name}:{container_tag}"
|
||||
# return "kong/insomnia:pr-8284"
|
||||
# return "'sweb.eval.x86_64.local_insomnia"
|
||||
# return "local_insomnia_why"
|
||||
# return "local/kong-insomnia:pr-8117"
|
||||
|
||||
|
||||
|
||||
def get_config(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
|
||||
if USE_INSTANCE_IMAGE:
|
||||
# We use a different instance image for the each instance of swe-bench eval
|
||||
# base_container_image = get_instance_docker_image(instance['instance_id'])
|
||||
base_container_image = get_instance_docker_image(instance)
|
||||
logger.info(
|
||||
f'Using instance container image: {base_container_image}. '
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
else:
|
||||
base_container_image = SWE_BENCH_CONTAINER_IMAGE
|
||||
logger.info(f'Using swe-bench container image: {base_container_image}')
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.enable_auto_lint = True
|
||||
sandbox_config.use_host_network = False
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
enable_jupyter=False,
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
def initialize_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required
|
||||
):
|
||||
"""Initialize the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
"""
|
||||
logger.info('-' * 30)
|
||||
logger.info('BEGIN Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
obs: CmdOutputObservation
|
||||
|
||||
REPO_NAME = instance['repo'].split('/')[-1]
|
||||
# Set instance id
|
||||
action = CmdRunAction(
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && echo 'export REPO_NAME={REPO_NAME}' >> ~/.bashrc"""
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
|
||||
)
|
||||
# pdb.set_trace()
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
|
||||
|
||||
if USE_INSTANCE_IMAGE:
|
||||
# inject the init script
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# inject the instance info
|
||||
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
|
||||
)
|
||||
|
||||
swe_instance_json_name = 'swe-bench-instance.json'
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Construct the full path for the desired file name within the temporary directory
|
||||
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
|
||||
# Write to the file with the desired name within the temporary directory
|
||||
with open(temp_file_path, 'w') as f:
|
||||
if not isinstance(instance, dict):
|
||||
json.dump([instance.to_dict()], f)
|
||||
else:
|
||||
json.dump([instance], f)
|
||||
|
||||
# Copy the file to the desired location
|
||||
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
|
||||
|
||||
# inject the instance swe entry
|
||||
runtime.copy_to(
|
||||
str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
|
||||
'/swe_util/',
|
||||
)
|
||||
action = CmdRunAction(command='cat ~/.bashrc')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source ~/.bashrc')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if isinstance(obs, ErrorObservation):
|
||||
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
|
||||
)
|
||||
else:
|
||||
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
|
||||
action.set_hard_timeout(1800)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to source /swe_util/swe_entry.sh: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git reset --hard')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(
|
||||
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
|
||||
##TODO:这里看看需不需要判断其他语言的环境
|
||||
# action = CmdRunAction(command='which python')
|
||||
# action.set_hard_timeout(600)
|
||||
# logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
# obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
# assert_and_raise(
|
||||
# obs.exit_code == 0 and 'testbed' in obs.content,
|
||||
# f'Expected to find python interpreter from testbed, but got: {str(obs)}',
|
||||
# )
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
|
||||
|
||||
def complete_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
|
||||
) -> dict[str, Any]:
|
||||
"""Complete the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
If you need to do something in the sandbox to get the correctness metric after
|
||||
the agent has run, modify this function.
|
||||
"""
|
||||
logger.info('-' * 30)
|
||||
logger.info('BEGIN Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
obs: CmdOutputObservation
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if obs.exit_code == -1:
|
||||
# The previous command is still running
|
||||
# We need to kill previous command
|
||||
logger.info('The previous command is still running, trying to kill it...')
|
||||
action = CmdRunAction(command='C-c')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Then run the command again
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git config --global core.pager ""')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git config --global core.pager "": {str(obs)}',
|
||||
)
|
||||
|
||||
|
||||
action = CmdRunAction(command='git add -A')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git add -A: {str(obs)}',
|
||||
)
|
||||
|
||||
##删除二进制文件
|
||||
action = CmdRunAction(
|
||||
command=f'''
|
||||
for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do
|
||||
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
|
||||
git rm -f "$file" 2>/dev/null || rm -f "$file"
|
||||
echo "Removed: $file"
|
||||
fi
|
||||
done
|
||||
'''
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to remove binary files: {str(obs)}',
|
||||
)
|
||||
|
||||
# pdb.set_trace()
|
||||
|
||||
n_retries = 0
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff'
|
||||
)
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
n_retries += 1
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
if obs.exit_code == 0:
|
||||
# git_patch = obs.content.strip()
|
||||
break
|
||||
else:
|
||||
logger.info('Failed to get git diff, retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
logger.error(f'Error occurred: {obs.content}. Retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
else:
|
||||
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
|
||||
|
||||
action = FileReadAction(
|
||||
path='patch.diff'
|
||||
)
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
git_patch = obs.content
|
||||
# pdb.set_trace()
|
||||
|
||||
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
return {'git_patch': git_patch}
|
||||
|
||||
|
||||
def process_instance(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
runtime_failure_count: int = 0,
|
||||
) -> EvalOutput:
|
||||
config = get_config(instance, metadata)
|
||||
|
||||
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
|
||||
if reset_logger:
|
||||
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
|
||||
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
# Increase resource_factor with increasing attempt_id
|
||||
if runtime_failure_count > 0:
|
||||
config.sandbox.remote_runtime_resource_factor = min(
|
||||
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
|
||||
8,
|
||||
)
|
||||
logger.warning(
|
||||
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||
)
|
||||
# pdb.set_trace()
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
|
||||
try:
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
instruction = get_instruction(instance, metadata)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=MessageAction(content=instruction),
|
||||
runtime=runtime,
|
||||
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
|
||||
metadata.agent_class
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
# if fatal error, throw EvalError to trigger re-run
|
||||
if is_fatal_evaluation_error(state.last_error):
|
||||
raise EvalException('Fatal error detected: ' + state.last_error)
|
||||
|
||||
# ======= THIS IS SWE-Bench specific =======
|
||||
# Get git patch
|
||||
return_val = complete_runtime(runtime, instance)
|
||||
git_patch = return_val['git_patch']
|
||||
logger.info(
|
||||
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
|
||||
)
|
||||
finally:
|
||||
runtime.close()
|
||||
# ==========================================
|
||||
|
||||
# ======= Attempt to evaluate the agent's edits =======
|
||||
# we use eval_infer.sh to evaluate the agent's edits, not here
|
||||
# because the agent may alter the environment / testcases
|
||||
###remove binary diffs
|
||||
def remove_binary_diffs(patch_text):
|
||||
lines = patch_text.splitlines()
|
||||
cleaned_lines = []
|
||||
block = []
|
||||
is_binary_block = False
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("diff --git "):
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
block = [line]
|
||||
is_binary_block = False
|
||||
elif "Binary files" in line:
|
||||
is_binary_block = True
|
||||
block.append(line)
|
||||
else:
|
||||
block.append(line)
|
||||
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
return "\n".join(cleaned_lines)
|
||||
git_patch = remove_binary_diffs(git_patch)
|
||||
test_result = {
|
||||
'git_patch': git_patch,
|
||||
}
|
||||
|
||||
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
instance_id=instance.instance_id,
|
||||
instruction=instruction,
|
||||
instance=instance.to_dict(), # SWE Bench specific
|
||||
test_result=test_result,
|
||||
metadata=metadata,
|
||||
history=histories,
|
||||
metrics=metrics,
|
||||
error=state.last_error if state and state.last_error else None,
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
|
||||
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
|
||||
if os.path.exists(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
data = toml.load(file)
|
||||
if 'selected_ids' in data:
|
||||
selected_ids = data['selected_ids']
|
||||
logger.info(
|
||||
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
|
||||
)
|
||||
subset = dataset[dataset[filter_column].isin(selected_ids)]
|
||||
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
|
||||
return subset
|
||||
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
|
||||
if len(skip_ids) > 0:
|
||||
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
|
||||
return dataset[~dataset[filter_column].isin(skip_ids)]
|
||||
return dataset
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pdb.set_trace()
|
||||
parser = get_parser()
|
||||
parser.add_argument(
|
||||
'--dataset',
|
||||
type=str,
|
||||
default='princeton-nlp/SWE-bench',
|
||||
help='data set to evaluate on, either full-test or lite-test',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--split',
|
||||
type=str,
|
||||
default='test',
|
||||
help='split to evaluate on',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
||||
# so we don't need to manage file uploading to OpenHands's repo
|
||||
# dataset = load_dataset(args.dataset, split=args.split)
|
||||
# dataset = load_dataset(args.dataset)
|
||||
dataset = load_dataset("json", data_files = args.dataset)
|
||||
dataset = dataset[args.split]
|
||||
swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
|
||||
logger.info(
|
||||
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
|
||||
)
|
||||
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
llm_config.log_completions = True
|
||||
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
|
||||
llm_config.modify_params = False
|
||||
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
details = {}
|
||||
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
|
||||
|
||||
dataset_descrption = (
|
||||
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
|
||||
)
|
||||
metadata = make_metadata(
|
||||
llm_config,
|
||||
dataset_descrption,
|
||||
args.agent_cls,
|
||||
args.max_iterations,
|
||||
args.eval_note,
|
||||
args.eval_output_dir,
|
||||
details=details,
|
||||
)
|
||||
|
||||
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
|
||||
print(f'### OUTPUT FILE: {output_file} ###')
|
||||
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
|
||||
|
||||
if len(instances) > 0 and not isinstance(
|
||||
instances['FAIL_TO_PASS'][instances['FAIL_TO_PASS'].index[0]], str
|
||||
):
|
||||
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
|
||||
instances[col] = instances[col].apply(lambda x: str(x))
|
||||
# if LANGUAGE == "java": ##TODO:适配多语言的版本
|
||||
# for col in ['issue_numbers', 'created_at']:
|
||||
# instances[col] = instances[col].apply(lambda x: str(x))
|
||||
run_evaluation(
|
||||
instances,
|
||||
metadata,
|
||||
output_file,
|
||||
args.eval_num_workers,
|
||||
process_instance,
|
||||
timeout_seconds=120 * 60, # 2 hour PER instance should be more than enough
|
||||
max_retries=5,
|
||||
)
|
||||
@@ -1,30 +0,0 @@
|
||||
import json
|
||||
|
||||
input_file = 'XXX.jsonl'
|
||||
output_file = 'YYY.jsonl'
|
||||
|
||||
with open(input_file, 'r', encoding='utf-8') as fin, open(output_file, 'w', encoding='utf-8') as fout:
|
||||
for line in fin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
data = json.loads(line)
|
||||
item = data
|
||||
|
||||
# 提取原始数据
|
||||
org = item.get("org", "")
|
||||
repo = item.get("repo", "")
|
||||
number = str(item.get("number", ""))
|
||||
|
||||
new_item = {}
|
||||
new_item["repo"] = f"{org}/{repo}"
|
||||
new_item["instance_id"] = f"{org}__{repo}-{number}"
|
||||
new_item["problem_statement"] = item["resolved_issues"][0].get("title", "") + "\n" + item["resolved_issues"][0].get("body", "")
|
||||
new_item["FAIL_TO_PASS"] = []
|
||||
new_item["PASS_TO_PASS"] = []
|
||||
new_item["base_commit"] = item['base'].get("sha","")
|
||||
new_item["version"] = "0.1" # depends
|
||||
|
||||
output_data = new_item
|
||||
fout.write(json.dumps(output_data, ensure_ascii=False) + "\n")
|
||||
@@ -1,24 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
IN_FILE = 'output.jsonl'
|
||||
OUT_FILE = 'patch.jsonl'
|
||||
|
||||
|
||||
def main():
|
||||
with open(IN_FILE, 'r') as fin:
|
||||
with open(OUT_FILE, 'w') as fout:
|
||||
for line in fin:
|
||||
data = json.loads(line)
|
||||
groups = re.match(r'(.*)__(.*)-(.*)', data['instance_id'])
|
||||
patch = {
|
||||
'org': groups.group(1),
|
||||
'repo': groups.group(2),
|
||||
'number': groups.group(3),
|
||||
'fix_patch': data['test_result']['git_patch']
|
||||
}
|
||||
fout.write(json.dumps(patch) + '\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,155 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
EVAL_LIMIT=$4
|
||||
MAX_ITER=$5
|
||||
NUM_WORKERS=$6
|
||||
DATASET=$7
|
||||
# SPLIT=$8
|
||||
LANGUAGE=$8
|
||||
# N_RUNS=$10
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
echo "Number of workers not specified, use default $NUM_WORKERS"
|
||||
fi
|
||||
checkout_eval_branch
|
||||
|
||||
if [ -z "$AGENT" ]; then
|
||||
echo "Agent not specified, use default CodeActAgent"
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
if [ -z "$MAX_ITER" ]; then
|
||||
echo "MAX_ITER not specified, use default 100"
|
||||
MAX_ITER=100
|
||||
fi
|
||||
|
||||
if [ -z "$USE_INSTANCE_IMAGE" ]; then
|
||||
echo "USE_INSTANCE_IMAGE not specified, use default true"
|
||||
USE_INSTANCE_IMAGE=true
|
||||
fi
|
||||
|
||||
if [ -z "$RUN_WITH_BROWSING" ]; then
|
||||
echo "RUN_WITH_BROWSING not specified, use default false"
|
||||
RUN_WITH_BROWSING=false
|
||||
fi
|
||||
|
||||
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
|
||||
DATASET="princeton-nlp/SWE-bench_Lite"
|
||||
fi
|
||||
|
||||
if [ -z "$LANGUAGE" ]; then
|
||||
echo "LANUGUAGE not specified, use default python"
|
||||
LANGUAGE="python"
|
||||
fi
|
||||
|
||||
if [ -z "$SPLIT" ]; then
|
||||
echo "LANUGUAGE not specified, use default python"
|
||||
SPLIT="train"
|
||||
fi
|
||||
|
||||
##TODO:适配多语言的版本
|
||||
# if [ -z "$SPLIT" ]; then
|
||||
# if [ "$LANGUAGE" = "python" ]; then
|
||||
# echo "SPLIT is test as LANUGUAGE is python"
|
||||
# SPLIT="test"
|
||||
# elif [ "$LANGUAGE" = "java" ]; then
|
||||
# echo "SPLIT is java_verified as LANUGUAGE is java"
|
||||
# SPLIT="java_verified"
|
||||
# fi
|
||||
# fi
|
||||
|
||||
if [ -z "$EVAL_DOCKER_IMAGE_PREFIX" ]; then
|
||||
if [ "$LANGUAGE" = "python" ]; then
|
||||
echo "EVAL_DOCKER_IMAGE_PREFIX is docker.io/xingyaoww/ as default as LANUGUAGE is python"
|
||||
EVAL_DOCKER_IMAGE_PREFIX="docker.io/xingyaoww/"
|
||||
elif [ "$LANGUAGE" = "java" ]; then
|
||||
echo "EVAL_DOCKER_IMAGE_PREFIX is java_verified as LANUGUAGE is java"
|
||||
EVAL_DOCKER_IMAGE_PREFIX=""
|
||||
fi
|
||||
fi
|
||||
|
||||
export EVAL_DOCKER_IMAGE_PREFIX=$EVAL_DOCKER_IMAGE_PREFIX
|
||||
echo "EVAL_DOCKER_IMAGE_PREFIX: $EVAL_DOCKER_IMAGE_PREFIX"
|
||||
export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
|
||||
echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
export LANGUAGE=$LANGUAGE
|
||||
echo "LANGUAGE: $LANGUAGE"
|
||||
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "SPLIT: $SPLIT"
|
||||
|
||||
# Default to NOT use Hint
|
||||
if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
fi
|
||||
|
||||
if [ "$RUN_WITH_BROWSING" = true ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-with-browsing"
|
||||
fi
|
||||
|
||||
if [ -n "$EXP_NAME" ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $eval_note \
|
||||
--dataset $DATASET \
|
||||
--split $SPLIT"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
fi
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
}
|
||||
|
||||
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
|
||||
if [ -z "$N_RUNS" ]; then
|
||||
N_RUNS=1
|
||||
echo "N_RUNS not specified, use default $N_RUNS"
|
||||
fi
|
||||
|
||||
# Skip runs if the run number is in the SKIP_RUNS list
|
||||
# read from env variable SKIP_RUNS as a comma separated list of run numbers
|
||||
SKIP_RUNS=(${SKIP_RUNS//,/ })
|
||||
for i in $(seq 1 $N_RUNS); do
|
||||
if [[ " ${SKIP_RUNS[@]} " =~ " $i " ]]; then
|
||||
echo "Skipping run $i"
|
||||
continue
|
||||
fi
|
||||
current_eval_note="$EVAL_NOTE-run_$i"
|
||||
echo "EVAL_NOTE: $current_eval_note"
|
||||
run_eval $current_eval_note
|
||||
done
|
||||
|
||||
checkout_original_branch
|
||||
@@ -1,54 +0,0 @@
|
||||
"""This script compares gold patches with OpenHands-generated patches and check whether
|
||||
OpenHands found the right (set of) files to modify.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
def extract_modified_files(patch):
|
||||
modified_files = set()
|
||||
file_pattern = re.compile(r'^diff --git a/(.*?) b/')
|
||||
|
||||
for line in patch.split('\n'):
|
||||
match = file_pattern.match(line)
|
||||
if match:
|
||||
modified_files.add(match.group(1))
|
||||
|
||||
return modified_files
|
||||
|
||||
|
||||
def process_report(oh_output_file):
|
||||
succ = 0
|
||||
fail = 0
|
||||
for line in open(oh_output_file):
|
||||
line = json.loads(line)
|
||||
instance_id = line['instance_id']
|
||||
gold_patch = line['swe_instance']['patch']
|
||||
generated_patch = line['git_patch']
|
||||
gold_modified_files = extract_modified_files(gold_patch)
|
||||
# swe-bench lite only: a gold patch always contains exactly one file
|
||||
assert len(gold_modified_files) == 1
|
||||
generated_modified_files = extract_modified_files(generated_patch)
|
||||
|
||||
# Check if all files in gold_patch are also in generated_patch
|
||||
all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
|
||||
if all_files_in_generated:
|
||||
succ += 1
|
||||
else:
|
||||
fail += 1
|
||||
print(
|
||||
f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
|
||||
)
|
||||
print(
|
||||
f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--oh_output_file', help='Path to the OH output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
process_report(args.oh_output_file)
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
source ~/.bashrc
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
|
||||
# SWE_INSTANCE_ID=django__django-11099
|
||||
if [ -z "$SWE_INSTANCE_ID" ]; then
|
||||
echo "Error: SWE_INSTANCE_ID is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$REPO_NAME" ]; then
|
||||
echo "Error: REPO_NAME is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
|
||||
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
|
||||
|
||||
if [[ -z "$item" ]]; then
|
||||
echo "No item found for the provided instance ID."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
|
||||
|
||||
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
|
||||
|
||||
# Clear the workspace
|
||||
if [ -d /workspace ]; then
|
||||
rm -rf /workspace/*
|
||||
else
|
||||
mkdir /workspace
|
||||
fi
|
||||
# Copy repo to workspace
|
||||
if [ -d /workspace/$WORKSPACE_NAME ]; then
|
||||
rm -rf /workspace/$WORKSPACE_NAME
|
||||
fi
|
||||
mkdir -p /workspace
|
||||
cp -r /home/$REPO_NAME /workspace/$WORKSPACE_NAME
|
||||
|
||||
# Activate instance-specific environment
|
||||
# . /opt/miniconda3/etc/profile.d/conda.sh
|
||||
# conda activate testbed
|
||||
@@ -1,27 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
|
||||
mkdir -p $EVAL_WORKSPACE
|
||||
|
||||
# 1. Prepare REPO
|
||||
echo "==== Prepare SWE-bench repo ===="
|
||||
OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
|
||||
OH_SWE_BENCH_REPO_BRANCH="eval"
|
||||
git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
|
||||
|
||||
# 2. Prepare DATA
|
||||
echo "==== Prepare SWE-bench data ===="
|
||||
EVAL_IMAGE=ghcr.io/all-hands-ai/eval-swe-bench:builder_with_conda
|
||||
EVAL_WORKSPACE=$(realpath $EVAL_WORKSPACE)
|
||||
chmod +x $EVAL_WORKSPACE/OH-SWE-bench/swebench/harness/prepare_data.sh
|
||||
if [ -d $EVAL_WORKSPACE/eval_data ]; then
|
||||
rm -r $EVAL_WORKSPACE/eval_data
|
||||
fi
|
||||
docker run \
|
||||
-v $EVAL_WORKSPACE:/workspace \
|
||||
-w /workspace \
|
||||
-u $(id -u):$(id -g) \
|
||||
-e HF_DATASETS_CACHE="/tmp" \
|
||||
--rm -it $EVAL_IMAGE \
|
||||
bash -c "cd OH-SWE-bench/swebench/harness && /swe_util/miniforge3/bin/conda run -n swe-bench-eval ./prepare_data.sh && mv eval_data /workspace/"
|
||||
@@ -1,96 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# assert user name is `root`
|
||||
if [ "$USER" != "root" ]; then
|
||||
echo "Error: This script is intended to be run by the 'root' user only." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source ~/.bashrc
|
||||
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
# Create logs directory
|
||||
LOG_DIR=/openhands/logs
|
||||
mkdir -p $LOG_DIR && chmod 777 $LOG_DIR
|
||||
|
||||
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
|
||||
# SWE_INSTANCE_ID=django__django-11099
|
||||
if [ -z "$SWE_INSTANCE_ID" ]; then
|
||||
echo "Error: SWE_INSTANCE_ID is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
|
||||
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-test-lite.json)
|
||||
|
||||
if [[ -z "$item" ]]; then
|
||||
echo "No item found for the provided instance ID."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CONDA_ENV_NAME=$(echo "$item" | jq -r '.repo + "__" + .version | gsub("/"; "__")')
|
||||
|
||||
echo "CONDA_ENV_NAME: $CONDA_ENV_NAME"
|
||||
|
||||
SWE_TASK_DIR=/openhands/swe_tasks
|
||||
mkdir -p $SWE_TASK_DIR
|
||||
# Dump test_patch to /workspace/test.patch
|
||||
echo "$item" | jq -r '.test_patch' > $SWE_TASK_DIR/test.patch
|
||||
# Dump patch to /workspace/gold.patch
|
||||
echo "$item" | jq -r '.patch' > $SWE_TASK_DIR/gold.patch
|
||||
# Dump the item to /workspace/instance.json except for the "test_patch" and "patch" fields
|
||||
echo "$item" | jq 'del(.test_patch, .patch)' > $SWE_TASK_DIR/instance.json
|
||||
|
||||
# Clear the workspace
|
||||
rm -rf /workspace/*
|
||||
# Copy repo to workspace
|
||||
if [ -d /workspace/$CONDA_ENV_NAME ]; then
|
||||
rm -rf /workspace/$CONDA_ENV_NAME
|
||||
fi
|
||||
cp -r $SWEUTIL_DIR/eval_data/testbeds/$CONDA_ENV_NAME /workspace
|
||||
|
||||
# Reset swe-bench testbed and install the repo
|
||||
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
|
||||
conda config --set changeps1 False
|
||||
conda config --append channels conda-forge
|
||||
conda activate swe-bench-eval
|
||||
|
||||
mkdir -p $SWE_TASK_DIR/reset_testbed_temp
|
||||
mkdir -p $SWE_TASK_DIR/reset_testbed_log_dir
|
||||
SWE_BENCH_DIR=/swe_util/OH-SWE-bench
|
||||
output=$(
|
||||
export PYTHONPATH=$SWE_BENCH_DIR && \
|
||||
cd $SWE_BENCH_DIR && \
|
||||
python swebench/harness/reset_swe_env.py \
|
||||
--swe_bench_tasks $SWEUTIL_DIR/eval_data/instances/swe-bench-test.json \
|
||||
--temp_dir $SWE_TASK_DIR/reset_testbed_temp \
|
||||
--testbed /workspace \
|
||||
--conda_path $SWEUTIL_DIR/miniforge3 \
|
||||
--instance_id $SWE_INSTANCE_ID \
|
||||
--log_dir $SWE_TASK_DIR/reset_testbed_log_dir \
|
||||
--timeout 900 \
|
||||
--verbose
|
||||
)
|
||||
|
||||
REPO_PATH=$(echo "$output" | awk -F': ' '/repo_path:/ {print $2}')
|
||||
TEST_CMD=$(echo "$output" | awk -F': ' '/test_cmd:/ {print $2}')
|
||||
echo "Repo Path: $REPO_PATH"
|
||||
echo "Test Command: $TEST_CMD"
|
||||
|
||||
echo "export SWE_BENCH_DIR=\"$SWE_BENCH_DIR\"" >> ~/.bashrc
|
||||
echo "export REPO_PATH=\"$REPO_PATH\"" >> ~/.bashrc
|
||||
echo "export TEST_CMD=\"$TEST_CMD\"" >> ~/.bashrc
|
||||
|
||||
if [[ "$REPO_PATH" == "None" ]]; then
|
||||
echo "Error: Failed to retrieve repository path. Tests may not have passed or output was not as expected." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Activate instance-specific environment
|
||||
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
|
||||
conda activate $CONDA_ENV_NAME
|
||||
|
||||
set +e
|
||||
@@ -1,16 +1,12 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { it, describe, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { AuthModal } from "#/components/features/waitlist/auth-modal";
|
||||
import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import * as AuthHook from "#/context/auth-context";
|
||||
|
||||
// Mock the useAuthUrl hook
|
||||
vi.mock("#/hooks/use-auth-url", () => ({
|
||||
useAuthUrl: () => "https://gitlab.com/oauth/authorize"
|
||||
}));
|
||||
|
||||
describe("AuthModal", () => {
|
||||
beforeEach(() => {
|
||||
beforeAll(() => {
|
||||
vi.stubGlobal("location", { href: "" });
|
||||
vi.spyOn(AuthHook, "useAuth").mockReturnValue({
|
||||
providersAreSet: false,
|
||||
@@ -20,29 +16,50 @@ describe("AuthModal", () => {
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
afterAll(() => {
|
||||
vi.unstubAllGlobals();
|
||||
vi.resetAllMocks();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("should render the GitHub and GitLab buttons", () => {
|
||||
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
|
||||
|
||||
it("should render a tos checkbox that is unchecked by default", () => {
|
||||
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
|
||||
expect(checkbox).not.toBeChecked();
|
||||
});
|
||||
|
||||
it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
|
||||
|
||||
expect(githubButton).toBeInTheDocument();
|
||||
expect(gitlabButton).toBeInTheDocument();
|
||||
expect(githubButton).toBeDisabled();
|
||||
expect(gitlabButton).toBeDisabled();
|
||||
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(githubButton).not.toBeDisabled();
|
||||
expect(gitlabButton).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should redirect to GitHub auth URL when GitHub button is clicked", async () => {
|
||||
it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
|
||||
const handleCaptureConsentSpy = vi.spyOn(
|
||||
CaptureConsent,
|
||||
"handleCaptureConsent",
|
||||
);
|
||||
|
||||
const user = userEvent.setup();
|
||||
const mockUrl = "https://github.com/login/oauth/authorize";
|
||||
render(<AuthModal githubAuthUrl={mockUrl} appMode="saas" />);
|
||||
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
|
||||
|
||||
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
await user.click(githubButton);
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(window.location.href).toBe(mockUrl);
|
||||
const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
await user.click(button);
|
||||
|
||||
expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { it, describe, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import AcceptTOS from "#/routes/accept-tos";
|
||||
import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import * as ToastHandlers from "#/utils/custom-toast-handlers";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { openHands } from "#/api/open-hands-axios";
|
||||
|
||||
// Mock the react-router hooks
|
||||
vi.mock("react-router", () => ({
|
||||
useNavigate: () => vi.fn(),
|
||||
useSearchParams: () => [
|
||||
{
|
||||
get: (param: string) => {
|
||||
if (param === "redirect_url") {
|
||||
return "/dashboard";
|
||||
}
|
||||
return null;
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
// Mock the axios instance
|
||||
vi.mock("#/api/open-hands-axios", () => ({
|
||||
openHands: {
|
||||
post: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock the toast handlers
|
||||
vi.mock("#/utils/custom-toast-handlers", () => ({
|
||||
displayErrorToast: vi.fn(),
|
||||
}));
|
||||
|
||||
// Create a wrapper with QueryClientProvider
|
||||
const createWrapper = () => {
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
retry: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return ({ children }: { children: React.ReactNode }) => (
|
||||
<QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
|
||||
);
|
||||
};
|
||||
|
||||
describe("AcceptTOS", () => {
|
||||
beforeEach(() => {
|
||||
vi.stubGlobal("location", { href: "" });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
it("should render a TOS checkbox that is unchecked by default", () => {
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
|
||||
expect(checkbox).not.toBeChecked();
|
||||
expect(continueButton).toBeDisabled();
|
||||
});
|
||||
|
||||
it("should enable the continue button when the TOS checkbox is checked", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
|
||||
expect(continueButton).toBeDisabled();
|
||||
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(continueButton).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should set user analytics consent to true when the user accepts TOS", async () => {
|
||||
const handleCaptureConsentSpy = vi.spyOn(
|
||||
CaptureConsent,
|
||||
"handleCaptureConsent",
|
||||
);
|
||||
|
||||
// Mock the API response
|
||||
vi.mocked(openHands.post).mockResolvedValue({
|
||||
data: { redirect_url: "/dashboard" },
|
||||
});
|
||||
|
||||
const user = userEvent.setup();
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
await user.click(continueButton);
|
||||
|
||||
// Wait for the mutation to complete
|
||||
await new Promise(process.nextTick);
|
||||
|
||||
expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
|
||||
expect(openHands.post).toHaveBeenCalledWith("/api/accept_tos", {
|
||||
redirect_url: "/dashboard",
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle external redirect URLs", async () => {
|
||||
// Mock the API response with an external URL
|
||||
const externalUrl = "https://example.com/callback";
|
||||
vi.mocked(openHands.post).mockResolvedValue({
|
||||
data: { redirect_url: externalUrl },
|
||||
});
|
||||
|
||||
const user = userEvent.setup();
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
await user.click(continueButton);
|
||||
|
||||
// Wait for the mutation to complete
|
||||
await new Promise(process.nextTick);
|
||||
|
||||
expect(window.location.href).toBe(externalUrl);
|
||||
});
|
||||
});
|
||||
@@ -91,13 +91,6 @@ describe("HomeScreen", () => {
|
||||
screen.getByTestId("task-suggestions");
|
||||
});
|
||||
|
||||
it("should have responsive layout for mobile and desktop screens", async () => {
|
||||
renderHomeScreen();
|
||||
|
||||
const mainContainer = screen.getByTestId("home-screen").querySelector("main");
|
||||
expect(mainContainer).toHaveClass("flex", "flex-col", "md:flex-row");
|
||||
});
|
||||
|
||||
it("should filter the suggested tasks based on the selected repository", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
|
||||
3623
frontend/package-lock.json
generated
3623
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,22 +1,22 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.36.0",
|
||||
"version": "0.34.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@heroui/react": "2.7.8",
|
||||
"@heroui/react": "2.7.6",
|
||||
"@microlink/react-json-view": "^1.26.1",
|
||||
"@monaco-editor/react": "^4.7.0-rc.0",
|
||||
"@react-router/node": "^7.5.3",
|
||||
"@react-router/serve": "^7.5.3",
|
||||
"@react-router/node": "^7.5.2",
|
||||
"@react-router/serve": "^7.5.2",
|
||||
"@react-types/shared": "^3.29.0",
|
||||
"@reduxjs/toolkit": "^2.7.0",
|
||||
"@stripe/react-stripe-js": "^3.6.0",
|
||||
"@stripe/stripe-js": "^7.2.0",
|
||||
"@tanstack/react-query": "^5.74.9",
|
||||
"@tanstack/react-query": "^5.74.7",
|
||||
"@vitejs/plugin-react": "^4.4.0",
|
||||
"@xterm/addon-fit": "^0.10.0",
|
||||
"@xterm/xterm": "^5.4.0",
|
||||
@@ -24,14 +24,14 @@
|
||||
"clsx": "^2.1.1",
|
||||
"eslint-config-airbnb-typescript": "^18.0.0",
|
||||
"framer-motion": "^12.9.2",
|
||||
"i18next": "^25.0.2",
|
||||
"i18next": "^25.0.1",
|
||||
"i18next-browser-languagedetector": "^8.0.5",
|
||||
"i18next-http-backend": "^3.0.2",
|
||||
"isbot": "^5.1.27",
|
||||
"jose": "^6.0.10",
|
||||
"lucide-react": "^0.503.0",
|
||||
"monaco-editor": "^0.52.2",
|
||||
"posthog-js": "^1.237.0",
|
||||
"posthog-js": "^1.236.7",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
"react-highlight": "^0.15.0",
|
||||
@@ -40,7 +40,7 @@
|
||||
"react-icons": "^5.5.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-redux": "^9.2.0",
|
||||
"react-router": "^7.5.3",
|
||||
"react-router": "^7.5.2",
|
||||
"react-syntax-highlighter": "^15.6.1",
|
||||
"react-textarea-autosize": "^8.5.9",
|
||||
"remark-gfm": "^4.0.1",
|
||||
@@ -82,7 +82,7 @@
|
||||
"@babel/types": "^7.27.0",
|
||||
"@mswjs/socket.io-binding": "^0.1.1",
|
||||
"@playwright/test": "^1.52.0",
|
||||
"@react-router/dev": "^7.5.3",
|
||||
"@react-router/dev": "^7.5.2",
|
||||
"@tailwindcss/typography": "^0.5.16",
|
||||
"@tanstack/eslint-plugin-query": "^5.74.7",
|
||||
"@testing-library/dom": "^10.4.0",
|
||||
|
||||
@@ -134,7 +134,7 @@ class OpenHands {
|
||||
|
||||
static async getUserConversations(): Promise<Conversation[]> {
|
||||
const { data } = await openHands.get<ResultSet<Conversation>>(
|
||||
"/api/conversations?limit=20",
|
||||
"/api/conversations?limit=9",
|
||||
);
|
||||
return data.results;
|
||||
}
|
||||
|
||||
@@ -12,21 +12,6 @@ const decodeHtmlEntities = (text: string): string => {
|
||||
return textarea.value;
|
||||
};
|
||||
|
||||
/**
|
||||
* Checks if a path is likely a directory
|
||||
* @param path The full path
|
||||
* @returns True if the path is likely a directory
|
||||
*/
|
||||
const isLikelyDirectory = (path: string): boolean => {
|
||||
if (!path) return false;
|
||||
// Check if path already ends with a slash
|
||||
if (path.endsWith("/") || path.endsWith("\\")) return true;
|
||||
// Check if path has no extension (simple heuristic)
|
||||
const lastPart = path.split(/[/\\]/).pop() || "";
|
||||
// If the last part has no dots or ends with a dot, it's likely a directory
|
||||
return !lastPart.includes(".") || lastPart.endsWith(".");
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts the filename from a path
|
||||
* @param path The full path
|
||||
@@ -36,14 +21,7 @@ const extractFilename = (path: string): string => {
|
||||
if (!path) return "";
|
||||
// Handle both Unix and Windows paths
|
||||
const parts = path.split(/[/\\]/);
|
||||
const filename = parts[parts.length - 1];
|
||||
|
||||
// Add trailing slash for directories
|
||||
if (isLikelyDirectory(path) && !filename.endsWith("/")) {
|
||||
return `${filename}/`;
|
||||
}
|
||||
|
||||
return filename;
|
||||
return parts[parts.length - 1];
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -7,12 +7,7 @@ interface UploadImageInputProps {
|
||||
|
||||
export function UploadImageInput({ onUpload, label }: UploadImageInputProps) {
|
||||
const handleUpload = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
if (event.target.files) {
|
||||
const validFiles = Array.from(event.target.files).filter((file) =>
|
||||
file.type.startsWith("image/"),
|
||||
);
|
||||
onUpload(validFiles);
|
||||
}
|
||||
if (event.target.files) onUpload(Array.from(event.target.files));
|
||||
};
|
||||
|
||||
return (
|
||||
|
||||
@@ -6,38 +6,55 @@ import { KeyStatusIcon } from "../key-status-icon";
|
||||
|
||||
interface GitHubTokenInputProps {
|
||||
onChange: (value: string) => void;
|
||||
onBaseDomainChange?: (value: string) => void;
|
||||
isGitHubTokenSet: boolean;
|
||||
name: string;
|
||||
baseDomainSet?: string | null;
|
||||
isSaas: boolean;
|
||||
}
|
||||
|
||||
export function GitHubTokenInput({
|
||||
onChange,
|
||||
onBaseDomainChange,
|
||||
isGitHubTokenSet,
|
||||
name,
|
||||
baseDomainSet,
|
||||
isSaas,
|
||||
}: GitHubTokenInputProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
{!isSaas && (
|
||||
<SettingsInput
|
||||
testId={name}
|
||||
name={name}
|
||||
onChange={onChange}
|
||||
label={t(I18nKey.GITHUB$TOKEN_LABEL)}
|
||||
type="password"
|
||||
className="w-[680px]"
|
||||
placeholder={isGitHubTokenSet ? "<hidden>" : ""}
|
||||
startContent={
|
||||
isGitHubTokenSet && (
|
||||
<KeyStatusIcon
|
||||
testId="gh-set-token-indicator"
|
||||
isSet={isGitHubTokenSet}
|
||||
/>
|
||||
)
|
||||
}
|
||||
/>
|
||||
)}
|
||||
|
||||
<SettingsInput
|
||||
testId={name}
|
||||
name={name}
|
||||
onChange={onChange}
|
||||
label={t(I18nKey.GITHUB$TOKEN_LABEL)}
|
||||
type="password"
|
||||
onChange={onBaseDomainChange || (() => {})}
|
||||
label={t(I18nKey.GITHUB$BASE_DOMAIN_LABEL)}
|
||||
type="text"
|
||||
className="w-[680px]"
|
||||
placeholder={isGitHubTokenSet ? "<hidden>" : ""}
|
||||
startContent={
|
||||
isGitHubTokenSet && (
|
||||
<KeyStatusIcon
|
||||
testId="gh-set-token-indicator"
|
||||
isSet={isGitHubTokenSet}
|
||||
/>
|
||||
)
|
||||
}
|
||||
placeholder={"github.com"}
|
||||
defaultValue={baseDomainSet ? baseDomainSet : undefined}
|
||||
/>
|
||||
|
||||
<GitHubTokenHelpAnchor />
|
||||
{!isSaas && <GitHubTokenHelpAnchor />}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -6,38 +6,55 @@ import { KeyStatusIcon } from "../key-status-icon";
|
||||
|
||||
interface GitLabTokenInputProps {
|
||||
onChange: (value: string) => void;
|
||||
onBaseDomainChange?: (value: string) => void;
|
||||
isGitLabTokenSet: boolean;
|
||||
name: string;
|
||||
baseDomainSet?: string | null;
|
||||
isSaas: boolean;
|
||||
}
|
||||
|
||||
export function GitLabTokenInput({
|
||||
onChange,
|
||||
onBaseDomainChange,
|
||||
isGitLabTokenSet,
|
||||
name,
|
||||
baseDomainSet,
|
||||
isSaas,
|
||||
}: GitLabTokenInputProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
{!isSaas && (
|
||||
<SettingsInput
|
||||
testId={name}
|
||||
name={name}
|
||||
onChange={onChange}
|
||||
label={t(I18nKey.GITLAB$TOKEN_LABEL)}
|
||||
type="password"
|
||||
className="w-[680px]"
|
||||
placeholder={isGitLabTokenSet ? "<hidden>" : ""}
|
||||
startContent={
|
||||
isGitLabTokenSet && (
|
||||
<KeyStatusIcon
|
||||
testId="gl-set-token-indicator"
|
||||
isSet={isGitLabTokenSet}
|
||||
/>
|
||||
)
|
||||
}
|
||||
/>
|
||||
)}
|
||||
|
||||
<SettingsInput
|
||||
testId={name}
|
||||
name={name}
|
||||
onChange={onChange}
|
||||
label={t(I18nKey.GITLAB$TOKEN_LABEL)}
|
||||
type="password"
|
||||
onChange={onBaseDomainChange || (() => {})}
|
||||
label={t(I18nKey.GITLAB$BASE_DOMAIN_LABEL)}
|
||||
type="text"
|
||||
className="w-[680px]"
|
||||
placeholder={isGitLabTokenSet ? "<hidden>" : ""}
|
||||
startContent={
|
||||
isGitLabTokenSet && (
|
||||
<KeyStatusIcon
|
||||
testId="gl-set-token-indicator"
|
||||
isSet={isGitLabTokenSet}
|
||||
/>
|
||||
)
|
||||
}
|
||||
placeholder={"gitlab.com"}
|
||||
defaultValue={baseDomainSet ? baseDomainSet : undefined}
|
||||
/>
|
||||
|
||||
<GitLabTokenHelpAnchor />
|
||||
{!isSaas && <GitLabTokenHelpAnchor />}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import { I18nKey } from "#/i18n/declaration";
|
||||
import AllHandsLogo from "#/assets/branding/all-hands-logo.svg?react";
|
||||
import { ModalBackdrop } from "#/components/shared/modals/modal-backdrop";
|
||||
import { ModalBody } from "#/components/shared/modals/modal-body";
|
||||
import { TOSCheckbox } from "./tos-checkbox";
|
||||
import { handleCaptureConsent } from "#/utils/handle-capture-consent";
|
||||
import { BrandButton } from "../settings/brand-button";
|
||||
import GitHubLogo from "#/assets/branding/github-logo.svg?react";
|
||||
import GitLabLogo from "#/assets/branding/gitlab-logo.svg?react";
|
||||
@@ -17,6 +19,7 @@ interface AuthModalProps {
|
||||
|
||||
export function AuthModal({ githubAuthUrl, appMode }: AuthModalProps) {
|
||||
const { t } = useTranslation();
|
||||
const [isTosAccepted, setIsTosAccepted] = React.useState(false);
|
||||
|
||||
const gitlabAuthUrl = useAuthUrl({
|
||||
appMode: appMode || null,
|
||||
@@ -25,14 +28,14 @@ export function AuthModal({ githubAuthUrl, appMode }: AuthModalProps) {
|
||||
|
||||
const handleGitHubAuth = () => {
|
||||
if (githubAuthUrl) {
|
||||
// Always start the OIDC flow, let the backend handle TOS check
|
||||
handleCaptureConsent(true);
|
||||
window.location.href = githubAuthUrl;
|
||||
}
|
||||
};
|
||||
|
||||
const handleGitLabAuth = () => {
|
||||
if (gitlabAuthUrl) {
|
||||
// Always start the OIDC flow, let the backend handle TOS check
|
||||
handleCaptureConsent(true);
|
||||
window.location.href = gitlabAuthUrl;
|
||||
}
|
||||
};
|
||||
@@ -47,8 +50,11 @@ export function AuthModal({ githubAuthUrl, appMode }: AuthModalProps) {
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
<TOSCheckbox onChange={() => setIsTosAccepted((prev) => !prev)} />
|
||||
|
||||
<div className="flex flex-col gap-3 w-full">
|
||||
<BrandButton
|
||||
isDisabled={!isTosAccepted}
|
||||
type="button"
|
||||
variant="primary"
|
||||
onClick={handleGitHubAuth}
|
||||
@@ -59,6 +65,7 @@ export function AuthModal({ githubAuthUrl, appMode }: AuthModalProps) {
|
||||
</BrandButton>
|
||||
|
||||
<BrandButton
|
||||
isDisabled={!isTosAccepted}
|
||||
type="button"
|
||||
variant="primary"
|
||||
onClick={handleGitLabAuth}
|
||||
|
||||
@@ -13,35 +13,21 @@ import posthog from "posthog-js";
|
||||
import "./i18n";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import store from "./store";
|
||||
import { useConfig } from "./hooks/query/use-config";
|
||||
import { AuthProvider } from "./context/auth-context";
|
||||
import { queryClientConfig } from "./query-client-config";
|
||||
import OpenHands from "./api/open-hands";
|
||||
import { displayErrorToast } from "./utils/custom-toast-handlers";
|
||||
|
||||
function PosthogInit() {
|
||||
const [posthogClientKey, setPosthogClientKey] = React.useState<string | null>(
|
||||
null,
|
||||
);
|
||||
const { data: config } = useConfig();
|
||||
|
||||
React.useEffect(() => {
|
||||
(async () => {
|
||||
try {
|
||||
const config = await OpenHands.getConfig();
|
||||
setPosthogClientKey(config.POSTHOG_CLIENT_KEY);
|
||||
} catch (error) {
|
||||
displayErrorToast("Error fetching PostHog client key");
|
||||
}
|
||||
})();
|
||||
}, []);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (posthogClientKey) {
|
||||
posthog.init(posthogClientKey, {
|
||||
if (config?.POSTHOG_CLIENT_KEY) {
|
||||
posthog.init(config.POSTHOG_CLIENT_KEY, {
|
||||
api_host: "https://us.i.posthog.com",
|
||||
person_profiles: "identified_only",
|
||||
});
|
||||
}
|
||||
}, [posthogClientKey]);
|
||||
}, [config]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
import { useQuery } from "@tanstack/react-query";
|
||||
import { useConfig } from "./use-config";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
|
||||
|
||||
export const useBalance = () => {
|
||||
const { data: config } = useConfig();
|
||||
const isOnTosPage = useIsOnTosPage();
|
||||
|
||||
return useQuery({
|
||||
queryKey: ["user", "balance"],
|
||||
queryFn: OpenHands.getBalance,
|
||||
enabled:
|
||||
!isOnTosPage &&
|
||||
config?.APP_MODE === "saas" &&
|
||||
config?.FEATURE_FLAGS.ENABLE_BILLING,
|
||||
config?.APP_MODE === "saas" && config?.FEATURE_FLAGS.ENABLE_BILLING,
|
||||
});
|
||||
};
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
import { useQuery } from "@tanstack/react-query";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
|
||||
|
||||
export const useConfig = () => {
|
||||
const isOnTosPage = useIsOnTosPage();
|
||||
|
||||
return useQuery({
|
||||
export const useConfig = () =>
|
||||
useQuery({
|
||||
queryKey: ["config"],
|
||||
queryFn: OpenHands.getConfig,
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
gcTime: 1000 * 60 * 15, // 15 minutes,
|
||||
enabled: !isOnTosPage,
|
||||
gcTime: 1000 * 60 * 15, // 15 minutes
|
||||
});
|
||||
};
|
||||
|
||||
@@ -3,19 +3,17 @@ import React from "react";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { useConfig } from "./use-config";
|
||||
import { useAuth } from "#/context/auth-context";
|
||||
import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
|
||||
|
||||
export const useIsAuthed = () => {
|
||||
const { providersAreSet } = useAuth();
|
||||
const { data: config } = useConfig();
|
||||
const isOnTosPage = useIsOnTosPage();
|
||||
|
||||
const appMode = React.useMemo(() => config?.APP_MODE, [config]);
|
||||
|
||||
return useQuery({
|
||||
queryKey: ["user", "authenticated", providersAreSet, appMode],
|
||||
queryFn: () => OpenHands.authenticate(appMode!),
|
||||
enabled: !!appMode && !isOnTosPage,
|
||||
enabled: !!appMode,
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
gcTime: 1000 * 60 * 15, // 15 minutes
|
||||
retry: false,
|
||||
|
||||
@@ -4,7 +4,6 @@ import posthog from "posthog-js";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { useAuth } from "#/context/auth-context";
|
||||
import { DEFAULT_SETTINGS } from "#/services/settings";
|
||||
import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
|
||||
import { Settings } from "#/types/settings";
|
||||
|
||||
const getSettingsQueryFn = async (): Promise<Settings> => {
|
||||
@@ -32,8 +31,6 @@ export const useSettings = () => {
|
||||
const { setProviderTokensSet, providerTokensSet, setProvidersAreSet } =
|
||||
useAuth();
|
||||
|
||||
const isOnTosPage = useIsOnTosPage();
|
||||
|
||||
const query = useQuery({
|
||||
queryKey: ["settings", providerTokensSet],
|
||||
queryFn: getSettingsQueryFn,
|
||||
@@ -43,7 +40,6 @@ export const useSettings = () => {
|
||||
retry: (_, error) => error.status !== 404,
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
gcTime: 1000 * 60 * 15, // 15 minutes
|
||||
enabled: !isOnTosPage,
|
||||
meta: {
|
||||
disableToast: true,
|
||||
},
|
||||
@@ -58,13 +54,11 @@ export const useSettings = () => {
|
||||
React.useEffect(() => {
|
||||
if (query.data?.PROVIDER_TOKENS_SET) {
|
||||
const providers = query.data.PROVIDER_TOKENS_SET;
|
||||
const setProviders = (
|
||||
Object.keys(providers) as Array<keyof typeof providers>
|
||||
).filter((key) => providers[key]);
|
||||
const setProviders = Object.keys(providers) as Array<
|
||||
keyof typeof providers
|
||||
>;
|
||||
setProviderTokensSet(setProviders);
|
||||
const atLeastOneSet = Object.values(query.data.PROVIDER_TOKENS_SET).some(
|
||||
(value) => value,
|
||||
);
|
||||
const atLeastOneSet = setProviders.length > 0;
|
||||
setProvidersAreSet(atLeastOneSet);
|
||||
}
|
||||
}, [query.data?.PROVIDER_TOKENS_SET, query.isFetched]);
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
import { useLocation } from "react-router";
|
||||
|
||||
/**
|
||||
* Hook to check if the current page is the Terms of Service acceptance page.
|
||||
*
|
||||
* @returns {boolean} True if the current page is the TOS acceptance page, false otherwise.
|
||||
*/
|
||||
export const useIsOnTosPage = (): boolean => {
|
||||
const { pathname } = useLocation();
|
||||
return pathname === "/accept-tos";
|
||||
};
|
||||
@@ -104,6 +104,7 @@ export enum I18nKey {
|
||||
EXIT_PROJECT$TITLE = "EXIT_PROJECT$TITLE",
|
||||
LANGUAGE$LABEL = "LANGUAGE$LABEL",
|
||||
GITHUB$TOKEN_LABEL = "GITHUB$TOKEN_LABEL",
|
||||
GITHUB$BASE_DOMAIN_LABEL = "GITHUB$BASE_DOMAIN_LABEL",
|
||||
GITHUB$TOKEN_OPTIONAL = "GITHUB$TOKEN_OPTIONAL",
|
||||
GITHUB$GET_TOKEN = "GITHUB$GET_TOKEN",
|
||||
GITHUB$TOKEN_HELP_TEXT = "GITHUB$TOKEN_HELP_TEXT",
|
||||
@@ -450,6 +451,7 @@ export enum I18nKey {
|
||||
MODEL_SELECTOR$VERIFIED = "MODEL_SELECTOR$VERIFIED",
|
||||
MODEL_SELECTOR$OTHERS = "MODEL_SELECTOR$OTHERS",
|
||||
GITLAB$TOKEN_LABEL = "GITLAB$TOKEN_LABEL",
|
||||
GITLAB$BASE_DOMAIN_LABEL = "GITLAB$BASE_DOMAIN_LABEL",
|
||||
GITLAB$GET_TOKEN = "GITLAB$GET_TOKEN",
|
||||
GITLAB$TOKEN_HELP_TEXT = "GITLAB$TOKEN_HELP_TEXT",
|
||||
GITLAB$TOKEN_LINK_TEXT = "GITLAB$TOKEN_LINK_TEXT",
|
||||
@@ -469,8 +471,4 @@ export enum I18nKey {
|
||||
SYSTEM_MESSAGE_MODAL$TOOLS_TAB = "SYSTEM_MESSAGE_MODAL$TOOLS_TAB",
|
||||
SYSTEM_MESSAGE_MODAL$PARAMETERS = "SYSTEM_MESSAGE_MODAL$PARAMETERS",
|
||||
SYSTEM_MESSAGE_MODAL$NO_TOOLS = "SYSTEM_MESSAGE_MODAL$NO_TOOLS",
|
||||
TOS$ACCEPT_TERMS_OF_SERVICE = "TOS$ACCEPT_TERMS_OF_SERVICE",
|
||||
TOS$ACCEPT_TERMS_DESCRIPTION = "TOS$ACCEPT_TERMS_DESCRIPTION",
|
||||
TOS$CONTINUE = "TOS$CONTINUE",
|
||||
TOS$ERROR_ACCEPTING = "TOS$ERROR_ACCEPTING",
|
||||
}
|
||||
|
||||
@@ -1569,6 +1569,21 @@
|
||||
"tr": "GitHub Jetonu",
|
||||
"de": "GitHub-Token"
|
||||
},
|
||||
"GITHUB$BASE_DOMAIN_LABEL": {
|
||||
"en": "GitHub Base Domain",
|
||||
"ja": "GitHub ベースドメイン",
|
||||
"zh-CN": "GitHub 基础域名",
|
||||
"zh-TW": "GitHub 基礎網域",
|
||||
"ko-KR": "GitHub 기본 도메인",
|
||||
"no": "GitHub Base Domain",
|
||||
"it": "Dominio Base GitHub",
|
||||
"pt": "Domínio Base do GitHub",
|
||||
"es": "Dominio Base de GitHub",
|
||||
"ar": "نطاق GitHub الأساسي",
|
||||
"fr": "Domaine de Base GitHub",
|
||||
"tr": "GitHub Temel Alan Adı",
|
||||
"de": "GitHub Basis-Domain"
|
||||
},
|
||||
"GITHUB$TOKEN_OPTIONAL": {
|
||||
"en": "GitHub Token (Optional)",
|
||||
"ja": "GitHubトークン(任意)",
|
||||
@@ -6469,6 +6484,21 @@
|
||||
"tr": "GitLab Jetonu",
|
||||
"de": "GitLab-Token"
|
||||
},
|
||||
"GITLAB$BASE_DOMAIN_LABEL": {
|
||||
"en": "GitLab Base Domain",
|
||||
"ja": "GitLab ベースドメイン",
|
||||
"zh-CN": "GitLab 基础域名",
|
||||
"zh-TW": "GitLab 基礎網域",
|
||||
"ko-KR": "GitLab 기본 도메인",
|
||||
"no": "GitLab Base Domain",
|
||||
"it": "Dominio Base GitLab",
|
||||
"pt": "Domínio Base do GitLab",
|
||||
"es": "Dominio Base de GitLab",
|
||||
"ar": "نطاق GitLab الأساسي",
|
||||
"fr": "Domaine de Base GitLab",
|
||||
"tr": "GitLab Temel Alan Adı",
|
||||
"de": "GitLab Basis-Domain"
|
||||
},
|
||||
"GITLAB$GET_TOKEN": {
|
||||
"en": "Generate a token on",
|
||||
"ja": "トークンを生成する",
|
||||
@@ -6559,21 +6589,6 @@
|
||||
"tr": "belgelendirme",
|
||||
"de": "Dokumentation"
|
||||
},
|
||||
"AGENT_ERROR$ERROR_ACTION_NOT_EXECUTED": {
|
||||
"en": "The action has not been executed. This may have occurred because the user pressed the stop button, or because the runtime system crashed and restarted due to resource constraints. Any previously established system state, dependencies, or environment variables may have been lost.",
|
||||
"ja": "アクションは実行されていません。これはユーザーが停止ボタンを押したか、リソース制約によりランタイムシステムがクラッシュして再起動したことが原因かもしれません。以前に確立されたシステム状態、依存関係、または環境変数は失われている可能性があります。",
|
||||
"zh-CN": "该操作尚未执行。这可能是因为用户按下了停止按钮,或者因为运行时系统由于资源限制而崩溃并重新启动。任何先前建立的系统状态、依赖项或环境变量可能已丢失。",
|
||||
"zh-TW": "該操作尚未執行。這可能是因為用戶按下了停止按鈕,或者因為運行時系統由於資源限制而崩潰並重新啟動。任何先前建立的系統狀態、依賴項或環境變數可能已丟失。",
|
||||
"ko-KR": "작업이 실행되지 않았습니다. 이는 사용자가 중지 버튼을 눌렀거나 리소스 제약으로 인해 런타임 시스템이 충돌하고 재시작되었기 때문일 수 있습니다. 이전에 설정된 시스템 상태, 종속성 또는 환경 변수가 손실되었을 수 있습니다.",
|
||||
"no": "Handlingen har ikke blitt utført. Dette kan ha skjedd fordi brukeren trykket på stoppknappen, eller fordi kjøretidssystemet krasjet og startet på nytt på grunn av ressursbegrensninger. Enhver tidligere etablert systemtilstand, avhengigheter eller miljøvariabler kan ha gått tapt.",
|
||||
"it": "L'azione non è stata eseguita. Ciò potrebbe essere accaduto perché l'utente ha premuto il pulsante di arresto, o perché il sistema di runtime si è arrestato in modo anomalo e riavviato a causa di vincoli di risorse. Qualsiasi stato di sistema, dipendenza o variabile d'ambiente precedentemente stabilito potrebbe essere andato perso.",
|
||||
"pt": "A ação não foi executada. Isso pode ter ocorrido porque o usuário pressionou o botão de parar, ou porque o sistema de tempo de execução travou e reiniciou devido a restrições de recursos. Qualquer estado do sistema, dependências ou variáveis de ambiente estabelecidos anteriormente podem ter sido perdidos.",
|
||||
"es": "La acción no se ha ejecutado. Esto puede haber ocurrido porque el usuario presionó el botón de detener, o porque el sistema de tiempo de ejecución se bloqueó y reinició debido a restricciones de recursos. Cualquier estado del sistema, dependencias o variables de entorno establecidos previamente pueden haberse perdido.",
|
||||
"ar": "لم يتم تنفيذ الإجراء. قد يكون هذا حدث لأن المستخدم ضغط على زر التوقف، أو لأن نظام التشغيل تعطل وأعيد تشغيله بسبب قيود الموارد. قد تكون أي حالة نظام أو تبعيات أو متغيرات بيئية تم إنشاؤها مسبقًا قد فُقدت.",
|
||||
"fr": "L'action n'a pas été exécutée. Cela peut s'être produit parce que l'utilisateur a appuyé sur le bouton d'arrêt, ou parce que le système d'exécution s'est planté et a redémarré en raison de contraintes de ressources. Tout état du système, dépendances ou variables d'environnement précédemment établis peuvent avoir été perdus.",
|
||||
"tr": "Eylem yürütülmedi. Bu, kullanıcının durdurma düğmesine basması veya çalışma zamanı sisteminin kaynak kısıtlamaları nedeniyle çökmesi ve yeniden başlaması nedeniyle olmuş olabilir. Daha önce kurulmuş olan herhangi bir sistem durumu, bağımlılıklar veya ortam değişkenleri kaybolmuş olabilir.",
|
||||
"de": "Die Aktion wurde nicht ausgeführt. Dies kann passiert sein, weil der Benutzer die Stopp-Taste gedrückt hat oder weil das Laufzeitsystem aufgrund von Ressourcenbeschränkungen abgestürzt und neu gestartet wurde. Alle zuvor eingerichteten Systemzustände, Abhängigkeiten oder Umgebungsvariablen sind möglicherweise verloren gegangen."
|
||||
},
|
||||
"DIFF_VIEWER$LOADING": {
|
||||
"en": "Loading...",
|
||||
"ja": "読み込み中...",
|
||||
@@ -6769,53 +6784,4 @@
|
||||
"es": "No hay herramientas disponibles para este agente",
|
||||
"tr": "Bu ajan için kullanılabilir araç yok"
|
||||
}
|
||||
,
|
||||
"TOS$ACCEPT_TERMS_OF_SERVICE": {
|
||||
"en": "Accept Terms of Service",
|
||||
"ja": "利用規約に同意する",
|
||||
"zh-CN": "接受服务条款",
|
||||
"zh-TW": "接受服務條款",
|
||||
"ko-KR": "서비스 약관 동의",
|
||||
"fr": "Accepter les conditions d'utilisation",
|
||||
"es": "Aceptar términos de servicio",
|
||||
"de": "Nutzungsbedingungen akzeptieren",
|
||||
"it": "Accetta i termini di servizio",
|
||||
"pt": "Aceitar termos de serviço"
|
||||
},
|
||||
"TOS$ACCEPT_TERMS_DESCRIPTION": {
|
||||
"en": "Please review and accept our terms of service before continuing",
|
||||
"ja": "続行する前に利用規約を確認して同意してください",
|
||||
"zh-CN": "请在继续之前查看并接受我们的服务条款",
|
||||
"zh-TW": "請在繼續之前查看並接受我們的服務條款",
|
||||
"ko-KR": "계속하기 전에 서비스 약관을 검토하고 동의해 주세요",
|
||||
"fr": "Veuillez examiner et accepter nos conditions d'utilisation avant de continuer",
|
||||
"es": "Por favor, revise y acepte nuestros términos de servicio antes de continuar",
|
||||
"de": "Bitte überprüfen und akzeptieren Sie unsere Nutzungsbedingungen, bevor Sie fortfahren",
|
||||
"it": "Si prega di rivedere e accettare i nostri termini di servizio prima di continuare",
|
||||
"pt": "Por favor, revise e aceite nossos termos de serviço antes de continuar"
|
||||
},
|
||||
"TOS$CONTINUE": {
|
||||
"en": "Continue",
|
||||
"ja": "続行",
|
||||
"zh-CN": "继续",
|
||||
"zh-TW": "繼續",
|
||||
"ko-KR": "계속",
|
||||
"fr": "Continuer",
|
||||
"es": "Continuar",
|
||||
"de": "Fortfahren",
|
||||
"it": "Continua",
|
||||
"pt": "Continuar"
|
||||
},
|
||||
"TOS$ERROR_ACCEPTING": {
|
||||
"en": "Error accepting Terms of Service",
|
||||
"ja": "利用規約の承諾中にエラーが発生しました",
|
||||
"zh-CN": "接受服务条款时出错",
|
||||
"zh-TW": "接受服務條款時出錯",
|
||||
"ko-KR": "서비스 약관 수락 중 오류 발생",
|
||||
"fr": "Erreur lors de l'acceptation des conditions d'utilisation",
|
||||
"es": "Error al aceptar los Términos de Servicio",
|
||||
"de": "Fehler beim Akzeptieren der Nutzungsbedingungen",
|
||||
"it": "Errore nell'accettazione dei Termini di Servizio",
|
||||
"pt": "Erro ao aceitar os Termos de Serviço"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
export default [
|
||||
layout("routes/root-layout.tsx", [
|
||||
index("routes/home.tsx"),
|
||||
route("accept-tos", "routes/accept-tos.tsx"),
|
||||
route("settings", "routes/settings.tsx", [
|
||||
index("routes/llm-settings.tsx"),
|
||||
route("git", "routes/git-settings.tsx"),
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
import React from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useNavigate, useSearchParams } from "react-router";
|
||||
import { useMutation } from "@tanstack/react-query";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import AllHandsLogo from "#/assets/branding/all-hands-logo.svg?react";
|
||||
import { TOSCheckbox } from "#/components/features/waitlist/tos-checkbox";
|
||||
import { BrandButton } from "#/components/features/settings/brand-button";
|
||||
import { handleCaptureConsent } from "#/utils/handle-capture-consent";
|
||||
import { openHands } from "#/api/open-hands-axios";
|
||||
|
||||
export default function AcceptTOS() {
|
||||
const { t } = useTranslation();
|
||||
const navigate = useNavigate();
|
||||
const [searchParams] = useSearchParams();
|
||||
const [isTosAccepted, setIsTosAccepted] = React.useState(false);
|
||||
|
||||
// Get the redirect URL from the query parameters
|
||||
const redirectUrl = searchParams.get("redirect_url") || "/";
|
||||
|
||||
// Use mutation for accepting TOS
|
||||
const { mutate: acceptTOS, isPending: isSubmitting } = useMutation({
|
||||
mutationFn: async () => {
|
||||
// Set consent for analytics
|
||||
handleCaptureConsent(true);
|
||||
|
||||
// Call the API to record TOS acceptance in the database
|
||||
return openHands.post("/api/accept_tos", {
|
||||
redirect_url: redirectUrl,
|
||||
});
|
||||
},
|
||||
onSuccess: (response) => {
|
||||
// Get the redirect URL from the response
|
||||
const finalRedirectUrl = response.data.redirect_url || redirectUrl;
|
||||
|
||||
// Check if the redirect URL is an external URL (starts with http or https)
|
||||
if (
|
||||
finalRedirectUrl.startsWith("http://") ||
|
||||
finalRedirectUrl.startsWith("https://")
|
||||
) {
|
||||
// For external URLs, redirect using window.location
|
||||
window.location.href = finalRedirectUrl;
|
||||
} else {
|
||||
// For internal routes, use navigate
|
||||
navigate(finalRedirectUrl);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const handleAcceptTOS = () => {
|
||||
if (isTosAccepted && !isSubmitting) {
|
||||
acceptTOS();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center h-full">
|
||||
<div className="border border-tertiary p-8 rounded-lg max-w-md w-full flex flex-col gap-6 items-center bg-base-secondary">
|
||||
<AllHandsLogo width={68} height={46} />
|
||||
|
||||
<div className="flex flex-col gap-2 w-full items-center text-center">
|
||||
<h1 className="text-2xl font-bold">
|
||||
{t(I18nKey.TOS$ACCEPT_TERMS_OF_SERVICE)}
|
||||
</h1>
|
||||
<p className="text-sm text-gray-500">
|
||||
{t(I18nKey.TOS$ACCEPT_TERMS_DESCRIPTION)}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<TOSCheckbox onChange={() => setIsTosAccepted((prev) => !prev)} />
|
||||
|
||||
<BrandButton
|
||||
isDisabled={!isTosAccepted || isSubmitting}
|
||||
type="button"
|
||||
variant="primary"
|
||||
onClick={handleAcceptTOS}
|
||||
className="w-full"
|
||||
>
|
||||
{isSubmitting ? t(I18nKey.HOME$LOADING) : t(I18nKey.TOS$CONTINUE)}
|
||||
</BrandButton>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -15,11 +15,13 @@ import {
|
||||
} from "#/utils/custom-toast-handlers";
|
||||
import { retrieveAxiosErrorMessage } from "#/utils/retrieve-axios-error-message";
|
||||
import { GitSettingInputsSkeleton } from "#/components/features/settings/git-settings/github-settings-inputs-skeleton";
|
||||
import { useAuth } from "#/context/auth-context";
|
||||
|
||||
function GitSettingsScreen() {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { mutate: saveSettings, isPending } = useSaveSettings();
|
||||
const { providerTokensSet } = useAuth();
|
||||
const { mutate: disconnectGitTokens } = useLogout();
|
||||
|
||||
const { data: settings, isLoading } = useSettings();
|
||||
@@ -29,10 +31,17 @@ function GitSettingsScreen() {
|
||||
React.useState(false);
|
||||
const [gitlabTokenInputHasValue, setGitlabTokenInputHasValue] =
|
||||
React.useState(false);
|
||||
const [githubBaseDomainInputHasValue, setGithubBaseDomainInputHasValue] =
|
||||
React.useState(false);
|
||||
const [gitlabBaseDomainInputHasValue, setGitlabBaseDomainInputHasValue] =
|
||||
React.useState(false);
|
||||
|
||||
const isSaas = config?.APP_MODE === "saas";
|
||||
const isGitHubTokenSet = !!settings?.PROVIDER_TOKENS_SET.github;
|
||||
const isGitLabTokenSet = !!settings?.PROVIDER_TOKENS_SET.gitlab;
|
||||
const isGitHubTokenSet = providerTokensSet.includes("github");
|
||||
const isGitLabTokenSet = providerTokensSet.includes("gitlab");
|
||||
|
||||
const existingGithubBaseDomain = settings?.PROVIDER_TOKENS_SET["github"];
|
||||
const existingGitlabBaseDomain = settings?.PROVIDER_TOKENS_SET["gitlab"];
|
||||
|
||||
const formAction = async (formData: FormData) => {
|
||||
const disconnectButtonClicked =
|
||||
@@ -45,12 +54,22 @@ function GitSettingsScreen() {
|
||||
|
||||
const githubToken = formData.get("github-token-input")?.toString() || "";
|
||||
const gitlabToken = formData.get("gitlab-token-input")?.toString() || "";
|
||||
const githubBaseDomain =
|
||||
formData.get("github-base-domain-input")?.toString() || "";
|
||||
const gitlabBaseDomain =
|
||||
formData.get("gitlab-base-domain-input")?.toString() || "";
|
||||
|
||||
saveSettings(
|
||||
{
|
||||
provider_tokens: {
|
||||
github: githubToken,
|
||||
gitlab: gitlabToken,
|
||||
github: {
|
||||
token: githubToken,
|
||||
base_domain: githubBaseDomain || null,
|
||||
},
|
||||
gitlab: {
|
||||
token: gitlabToken,
|
||||
base_domain: gitlabBaseDomain || null,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -64,12 +83,19 @@ function GitSettingsScreen() {
|
||||
onSettled: () => {
|
||||
setGithubTokenInputHasValue(false);
|
||||
setGitlabTokenInputHasValue(false);
|
||||
setGithubBaseDomainInputHasValue(false);
|
||||
setGitlabBaseDomainInputHasValue(false);
|
||||
},
|
||||
},
|
||||
);
|
||||
};
|
||||
|
||||
const formIsClean = !githubTokenInputHasValue && !gitlabTokenInputHasValue;
|
||||
const formIsClean =
|
||||
!githubTokenInputHasValue &&
|
||||
!gitlabTokenInputHasValue &&
|
||||
!githubBaseDomainInputHasValue &&
|
||||
!gitlabBaseDomainInputHasValue;
|
||||
|
||||
const shouldRenderExternalConfigureButtons = isSaas && config.APP_SLUG;
|
||||
|
||||
return (
|
||||
@@ -84,22 +110,32 @@ function GitSettingsScreen() {
|
||||
<ConfigureGitHubRepositoriesAnchor slug={config.APP_SLUG!} />
|
||||
)}
|
||||
|
||||
{!isSaas && !isLoading && (
|
||||
{!isLoading && (
|
||||
<div className="p-9 flex flex-col gap-12">
|
||||
<GitHubTokenInput
|
||||
name="github-token-input"
|
||||
baseDomainSet={existingGithubBaseDomain}
|
||||
isGitHubTokenSet={isGitHubTokenSet}
|
||||
onChange={(value) => {
|
||||
setGithubTokenInputHasValue(!!value);
|
||||
}}
|
||||
onBaseDomainChange={(value) => {
|
||||
setGithubBaseDomainInputHasValue(!!value);
|
||||
}}
|
||||
isSaas={isSaas}
|
||||
/>
|
||||
|
||||
<GitLabTokenInput
|
||||
name="gitlab-token-input"
|
||||
baseDomainSet={existingGitlabBaseDomain}
|
||||
isGitLabTokenSet={isGitLabTokenSet}
|
||||
onChange={(value) => {
|
||||
setGitlabTokenInputHasValue(!!value);
|
||||
}}
|
||||
onBaseDomainChange={(value) => {
|
||||
setGitlabBaseDomainInputHasValue(!!value);
|
||||
}}
|
||||
isSaas={isSaas}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -22,7 +22,7 @@ function HomeScreen() {
|
||||
|
||||
<hr className="border-[#717888]" />
|
||||
|
||||
<main className="flex flex-col md:flex-row justify-between gap-4">
|
||||
<main className="flex justify-between gap-4">
|
||||
<RepoConnector
|
||||
onRepoSelection={(title) => setSelectedRepoTitle(title)}
|
||||
/>
|
||||
|
||||
@@ -21,7 +21,6 @@ import { useMigrateUserConsent } from "#/hooks/use-migrate-user-consent";
|
||||
import { useBalance } from "#/hooks/query/use-balance";
|
||||
import { SetupPaymentModal } from "#/components/features/payment/setup-payment-modal";
|
||||
import { displaySuccessToast } from "#/utils/custom-toast-handlers";
|
||||
import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
|
||||
|
||||
export function ErrorBoundary() {
|
||||
const error = useRouteError();
|
||||
@@ -59,7 +58,6 @@ export function ErrorBoundary() {
|
||||
export default function MainApp() {
|
||||
const navigate = useNavigate();
|
||||
const { pathname } = useLocation();
|
||||
const tosPageStatus = useIsOnTosPage();
|
||||
const [searchParams] = useSearchParams();
|
||||
const { data: settings } = useSettings();
|
||||
const { error, isFetching } = useBalance();
|
||||
@@ -73,75 +71,49 @@ export default function MainApp() {
|
||||
isError: authError,
|
||||
} = useIsAuthed();
|
||||
|
||||
// Always call the hook, but we'll only use the result when not on TOS page
|
||||
const gitHubAuthUrl = useGitHubAuthUrl({
|
||||
appMode: config.data?.APP_MODE || null,
|
||||
gitHubClientId: config.data?.GITHUB_CLIENT_ID || null,
|
||||
});
|
||||
|
||||
// When on TOS page, we don't use the GitHub auth URL
|
||||
const effectiveGitHubAuthUrl = tosPageStatus ? null : gitHubAuthUrl;
|
||||
|
||||
const [consentFormIsOpen, setConsentFormIsOpen] = React.useState(false);
|
||||
|
||||
React.useEffect(() => {
|
||||
// Don't change language when on TOS page
|
||||
if (!tosPageStatus && settings?.LANGUAGE) {
|
||||
if (settings?.LANGUAGE) {
|
||||
i18n.changeLanguage(settings.LANGUAGE);
|
||||
}
|
||||
}, [settings?.LANGUAGE, tosPageStatus]);
|
||||
}, [settings?.LANGUAGE]);
|
||||
|
||||
React.useEffect(() => {
|
||||
// Don't show consent form when on TOS page
|
||||
if (!tosPageStatus) {
|
||||
const consentFormModalIsOpen =
|
||||
settings?.USER_CONSENTS_TO_ANALYTICS === null;
|
||||
const consentFormModalIsOpen =
|
||||
settings?.USER_CONSENTS_TO_ANALYTICS === null;
|
||||
|
||||
setConsentFormIsOpen(consentFormModalIsOpen);
|
||||
}
|
||||
}, [settings, tosPageStatus]);
|
||||
setConsentFormIsOpen(consentFormModalIsOpen);
|
||||
}, [settings]);
|
||||
|
||||
React.useEffect(() => {
|
||||
// Don't migrate user consent when on TOS page
|
||||
if (!tosPageStatus) {
|
||||
// Migrate user consent to the server if it was previously stored in localStorage
|
||||
migrateUserConsent({
|
||||
handleAnalyticsWasPresentInLocalStorage: () => {
|
||||
setConsentFormIsOpen(false);
|
||||
},
|
||||
});
|
||||
}
|
||||
}, [tosPageStatus]);
|
||||
// Migrate user consent to the server if it was previously stored in localStorage
|
||||
migrateUserConsent({
|
||||
handleAnalyticsWasPresentInLocalStorage: () => {
|
||||
setConsentFormIsOpen(false);
|
||||
},
|
||||
});
|
||||
}, []);
|
||||
|
||||
React.useEffect(() => {
|
||||
// Don't do any redirects when on TOS page
|
||||
if (!tosPageStatus) {
|
||||
// Don't allow users to use the app if it 402s
|
||||
if (error?.status === 402 && pathname !== "/") {
|
||||
navigate("/");
|
||||
} else if (
|
||||
!isFetching &&
|
||||
searchParams.get("free_credits") === "success"
|
||||
) {
|
||||
displaySuccessToast(t(I18nKey.BILLING$YOURE_IN));
|
||||
searchParams.delete("free_credits");
|
||||
navigate("/");
|
||||
}
|
||||
// Don't allow users to use the app if it 402s
|
||||
if (error?.status === 402 && pathname !== "/") {
|
||||
navigate("/");
|
||||
} else if (!isFetching && searchParams.get("free_credits") === "success") {
|
||||
displaySuccessToast(t(I18nKey.BILLING$YOURE_IN));
|
||||
searchParams.delete("free_credits");
|
||||
navigate("/");
|
||||
}
|
||||
}, [error?.status, pathname, isFetching, tosPageStatus]);
|
||||
}, [error?.status, pathname, isFetching]);
|
||||
|
||||
// When on TOS page, we don't make any API calls, so we need to handle this case
|
||||
const userIsAuthed = tosPageStatus ? false : !!isAuthed && !authError;
|
||||
|
||||
// Only show the auth modal if:
|
||||
// 1. User is not authenticated
|
||||
// 2. We're not currently on the TOS page
|
||||
// 3. We're in SaaS mode
|
||||
const userIsAuthed = !!isAuthed && !authError;
|
||||
const renderAuthModal =
|
||||
!isFetchingAuth &&
|
||||
!userIsAuthed &&
|
||||
!tosPageStatus &&
|
||||
config.data?.APP_MODE === "saas";
|
||||
!isFetchingAuth && !userIsAuthed && config.data?.APP_MODE === "saas";
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -159,7 +131,7 @@ export default function MainApp() {
|
||||
|
||||
{renderAuthModal && (
|
||||
<AuthModal
|
||||
githubAuthUrl={effectiveGitHubAuthUrl}
|
||||
githubAuthUrl={gitHubAuthUrl}
|
||||
appMode={config.data?.APP_MODE}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -11,13 +11,13 @@ export const DEFAULT_SETTINGS: Settings = {
|
||||
CONFIRMATION_MODE: false,
|
||||
SECURITY_ANALYZER: "",
|
||||
REMOTE_RUNTIME_RESOURCE_FACTOR: 1,
|
||||
PROVIDER_TOKENS_SET: { github: false, gitlab: false },
|
||||
PROVIDER_TOKENS_SET: { github: null, gitlab: null },
|
||||
ENABLE_DEFAULT_CONDENSER: true,
|
||||
ENABLE_SOUND_NOTIFICATIONS: false,
|
||||
USER_CONSENTS_TO_ANALYTICS: false,
|
||||
PROVIDER_TOKENS: {
|
||||
github: "",
|
||||
gitlab: "",
|
||||
github: { token: "", base_domain: null },
|
||||
gitlab: { token: "", base_domain: null },
|
||||
},
|
||||
IS_NEW_USER: true,
|
||||
};
|
||||
|
||||
@@ -5,6 +5,11 @@ export const ProviderOptions = {
|
||||
|
||||
export type Provider = keyof typeof ProviderOptions;
|
||||
|
||||
export type ProviderToken = {
|
||||
token: string;
|
||||
base_domain: string | null;
|
||||
};
|
||||
|
||||
export type Settings = {
|
||||
LLM_MODEL: string;
|
||||
LLM_BASE_URL: string;
|
||||
@@ -14,11 +19,11 @@ export type Settings = {
|
||||
CONFIRMATION_MODE: boolean;
|
||||
SECURITY_ANALYZER: string;
|
||||
REMOTE_RUNTIME_RESOURCE_FACTOR: number | null;
|
||||
PROVIDER_TOKENS_SET: Record<Provider, boolean>;
|
||||
PROVIDER_TOKENS_SET: Record<Provider, string | null>;
|
||||
ENABLE_DEFAULT_CONDENSER: boolean;
|
||||
ENABLE_SOUND_NOTIFICATIONS: boolean;
|
||||
USER_CONSENTS_TO_ANALYTICS: boolean | null;
|
||||
PROVIDER_TOKENS: Record<Provider, string>;
|
||||
PROVIDER_TOKENS: Record<Provider, ProviderToken>;
|
||||
IS_NEW_USER?: boolean;
|
||||
};
|
||||
|
||||
@@ -35,17 +40,17 @@ export type ApiSettings = {
|
||||
enable_default_condenser: boolean;
|
||||
enable_sound_notifications: boolean;
|
||||
user_consents_to_analytics: boolean | null;
|
||||
provider_tokens: Record<Provider, string>;
|
||||
provider_tokens_set: Record<Provider, boolean>;
|
||||
provider_tokens: Record<Provider, ProviderToken>;
|
||||
provider_tokens_set: Record<Provider, string | null>;
|
||||
};
|
||||
|
||||
export type PostSettings = Settings & {
|
||||
provider_tokens: Record<Provider, string>;
|
||||
provider_tokens: Record<Provider, ProviderToken>;
|
||||
user_consents_to_analytics: boolean | null;
|
||||
llm_api_key?: string | null;
|
||||
};
|
||||
|
||||
export type PostApiSettings = ApiSettings & {
|
||||
provider_tokens: Record<Provider, string>;
|
||||
provider_tokens: Record<Provider, ProviderToken>;
|
||||
user_consents_to_analytics: boolean | null;
|
||||
};
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
/**
|
||||
* Checks if the current page is the Terms of Service acceptance page.
|
||||
* This function works outside of React Router context by checking window.location directly.
|
||||
*
|
||||
* @param {string} [pathname] - Optional pathname from React Router's useLocation hook
|
||||
* @returns {boolean} True if the current page is the TOS acceptance page, false otherwise.
|
||||
*/
|
||||
export const isOnTosPage = (pathname?: string): boolean => {
|
||||
// If pathname is provided (from React Router), use it
|
||||
if (pathname !== undefined) {
|
||||
return pathname === "/accept-tos";
|
||||
}
|
||||
|
||||
// Otherwise check window.location (works outside React Router context)
|
||||
if (typeof window !== "undefined") {
|
||||
return window.location.pathname === "/accept-tos";
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
@@ -18,10 +18,6 @@ vi.mock("react-i18next", async (importOriginal) => ({
|
||||
}),
|
||||
}));
|
||||
|
||||
vi.mock("#/hooks/use-is-on-tos-page", () => ({
|
||||
useIsOnTosPage: () => false,
|
||||
}));
|
||||
|
||||
// Mock requests during tests
|
||||
beforeAll(() => server.listen({ onUnhandledRequest: "bypass" }));
|
||||
afterEach(() => {
|
||||
|
||||
64
microagents/add_openhands_repo_instruction.md
Normal file
64
microagents/add_openhands_repo_instruction.md
Normal file
@@ -0,0 +1,64 @@
|
||||
---
|
||||
name: add_openhands_repo_instruction
|
||||
version: 1.0.0
|
||||
author: openhands
|
||||
agent: CodeActAgent
|
||||
inputs:
|
||||
- name: REPO_FOLDER_NAME
|
||||
description: "Branch for the agent to work on"
|
||||
required: false
|
||||
---
|
||||
|
||||
Please browse the current repository under /workspace/{{ REPO_FOLDER_NAME }}, look at the documentation and relevant code, and understand the purpose of this repository.
|
||||
|
||||
Specifically, I want you to create a `.openhands/microagents/repo.md` file. This file should contain succinct information that summarizes (1) the purpose of this repository, (2) the general setup of this repo, and (3) a brief description of the structure of this repo.
|
||||
|
||||
Here's an example:
|
||||
```markdown
|
||||
---
|
||||
name: repo
|
||||
type: repo
|
||||
agent: CodeActAgent
|
||||
---
|
||||
|
||||
This repository contains the code for OpenHands, an automated AI software engineer. It has a Python backend
|
||||
(in the `openhands` directory) and React frontend (in the `frontend` directory).
|
||||
|
||||
## General Setup:
|
||||
To set up the entire repo, including frontend and backend, run `make build`.
|
||||
You don't need to do this unless the user asks you to, or if you're trying to run the entire application.
|
||||
|
||||
Before pushing any changes, you should ensure that any lint errors or simple test errors have been fixed.
|
||||
|
||||
* If you've made changes to the backend, you should run `pre-commit run --all-files --config ./dev_config/python/.pre-commit-config.yaml`
|
||||
* If you've made changes to the frontend, you should run `cd frontend && npm run lint:fix && npm run build ; cd ..`
|
||||
|
||||
If either command fails, it may have automatically fixed some issues. You should fix any issues that weren't automatically fixed,
|
||||
then re-run the command to ensure it passes.
|
||||
|
||||
## Repository Structure
|
||||
Backend:
|
||||
- Located in the `openhands` directory
|
||||
- Testing:
|
||||
- All tests are in `tests/unit/test_*.py`
|
||||
- To test new code, run `poetry run pytest tests/unit/test_xxx.py` where `xxx` is the appropriate file for the current functionality
|
||||
- Write all tests with pytest
|
||||
|
||||
Frontend:
|
||||
- Located in the `frontend` directory
|
||||
- Prerequisites: A recent version of NodeJS / NPM
|
||||
- Setup: Run `npm install` in the frontend directory
|
||||
- Testing:
|
||||
- Run tests: `npm run test`
|
||||
- To run specific tests: `npm run test -- -t "TestName"`
|
||||
- Building:
|
||||
- Build for production: `npm run build`
|
||||
- Environment Variables:
|
||||
- Set in `frontend/.env` or as environment variables
|
||||
- Available variables: VITE_BACKEND_HOST, VITE_USE_TLS, VITE_INSECURE_SKIP_VERIFY, VITE_FRONTEND_PORT
|
||||
- Internationalization:
|
||||
- Generate i18n declaration file: `npm run make-i18n`
|
||||
```
|
||||
|
||||
Now, please write a similar markdown for the current repository.
|
||||
Read all the GitHub workflows under .github/ of the repository (if this folder exists) to understand the CI checks (e.g., linter, pre-commit), and include those in the repo.md file.
|
||||
19
microagents/address_pr_comments.md
Normal file
19
microagents/address_pr_comments.md
Normal file
@@ -0,0 +1,19 @@
|
||||
---
|
||||
name: address_pr_comments
|
||||
version: 1.0.0
|
||||
author: openhands
|
||||
agent: CodeActAgent
|
||||
inputs:
|
||||
- name: PR_URL
|
||||
description: "URL of the pull request"
|
||||
required: true
|
||||
- name: BRANCH_NAME
|
||||
description: "Branch name corresponds to the pull request"
|
||||
required: true
|
||||
---
|
||||
|
||||
First, check the branch {{ BRANCH_NAME }} and read the diff against the main branch to understand the purpose.
|
||||
|
||||
This branch corresponds to this PR {{ PR_URL }}
|
||||
|
||||
Next, you should use the GitHub API to read the reviews and comments on this PR and address them.
|
||||
@@ -1,32 +0,0 @@
|
||||
---
|
||||
name: agent_memory
|
||||
type: knowledge
|
||||
version: 1.0.0
|
||||
agent: CodeActAgent
|
||||
triggers:
|
||||
- /remember
|
||||
---
|
||||
|
||||
* Repository memory: Use .openhands/microagents/repo.md under each repository root to store and access important information.
|
||||
- If this file exists, it will be added to your context automatically.
|
||||
- If missing, you should create it unless the user has explicitly asked you to not do so.
|
||||
|
||||
* Store and maintain **general knowledge** that will be helpful for most future tasks:
|
||||
1. Repository structure
|
||||
2. Common commands (build, lint, test, pre-commit, etc.)
|
||||
3. Code style preferences
|
||||
4. Workflows and best practices
|
||||
5. Any other repository-specific knowledge you learn
|
||||
|
||||
* IMPORTANT: ONLY LOG the information that would be helpful for different future tasks, for example, how to configure the settings, how to setup the repository. Do NOT add issue-specific information (e.g., what specific error you have ran into and how you fix it).
|
||||
|
||||
* When adding new information:
|
||||
- ALWAYS ask for user confirmation first by listing the exact items (numbered 1, 2, 3, etc.) you plan to save to repo.md
|
||||
- Only save the items the user approves (they may ask you to save a subset)
|
||||
- Ensure it integrates nicely with existing knowledge in repo.md
|
||||
- Reorganize the content if needed to maintain clarity and organization
|
||||
- Group related information together under appropriate sections or headings
|
||||
- If you've only explored a portion of the codebase, clearly note this limitation in the repository structure documentation
|
||||
- If you don't know the essential commands for working with the repository, such as lint or typecheck, ask the user and suggest adding them to repo.md for future reference (with permission)
|
||||
|
||||
When you receive this message, please review and summarize your recent actions and observations, then present a list of valuable information that should be saved in repo.md to the user.
|
||||
27
microagents/get_test_to_pass.md
Normal file
27
microagents/get_test_to_pass.md
Normal file
@@ -0,0 +1,27 @@
|
||||
---
|
||||
name: get_test_to_pass
|
||||
version: 1.0.0
|
||||
author: openhands
|
||||
agent: CodeActAgent
|
||||
inputs:
|
||||
- name: BRANCH_NAME
|
||||
description: "Branch for the agent to work on"
|
||||
required: true
|
||||
- name: TEST_COMMAND_TO_RUN
|
||||
description: "The test command you want the agent to work on. For example, `pytest tests/unit/test_bash_parsing.py`"
|
||||
required: true
|
||||
- name: FUNCTION_TO_FIX
|
||||
description: "The name of function to fix"
|
||||
required: false
|
||||
- name: FILE_FOR_FUNCTION
|
||||
description: "The path of the file that contains the function"
|
||||
required: false
|
||||
---
|
||||
|
||||
Can you check out branch "{{ BRANCH_NAME }}", and run {{ TEST_COMMAND_TO_RUN }}.
|
||||
|
||||
{%- if FUNCTION_TO_FIX and FILE_FOR_FUNCTION %}
|
||||
Help me fix these tests to pass by fixing the {{ FUNCTION_TO_FIX }} function in file {{ FILE_FOR_FUNCTION }}.
|
||||
{%- endif %}
|
||||
|
||||
PLEASE DO NOT modify the tests by yourselves -- Let me know if you think some of the tests are incorrect.
|
||||
21
microagents/update_pr_description.md
Normal file
21
microagents/update_pr_description.md
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: update_pr_description
|
||||
version: 1.0.0
|
||||
author: openhands
|
||||
agent: CodeActAgent
|
||||
inputs:
|
||||
- name: PR_URL
|
||||
description: "URL of the pull request"
|
||||
type: string
|
||||
required: true
|
||||
validation:
|
||||
pattern: "^https://github.com/.+/.+/pull/[0-9]+$"
|
||||
- name: BRANCH_NAME
|
||||
description: "Branch name corresponds to the pull request"
|
||||
type: string
|
||||
required: true
|
||||
---
|
||||
|
||||
Please check the branch "{{ BRANCH_NAME }}" and look at the diff against the main branch. This branch belongs to this PR "{{ PR_URL }}".
|
||||
|
||||
Once you understand the purpose of the diff, please use Github API to read the existing PR description, and update it to be more reflective of the changes we've made when necessary.
|
||||
21
microagents/update_test_for_new_implementation.md
Normal file
21
microagents/update_test_for_new_implementation.md
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: update_test_for_new_implementation
|
||||
version: 1.0.0
|
||||
author: openhands
|
||||
agent: CodeActAgent
|
||||
inputs:
|
||||
- name: BRANCH_NAME
|
||||
description: "Branch for the agent to work on"
|
||||
required: true
|
||||
- name: TEST_COMMAND_TO_RUN
|
||||
description: "The test command you want the agent to work on. For example, `pytest tests/unit/test_bash_parsing.py`"
|
||||
required: true
|
||||
---
|
||||
|
||||
Can you check out branch "{{ BRANCH_NAME }}", and run {{ TEST_COMMAND_TO_RUN }}.
|
||||
|
||||
{%- if FUNCTION_TO_FIX and FILE_FOR_FUNCTION %}
|
||||
Help me fix these tests to pass by fixing the {{ FUNCTION_TO_FIX }} function in file {{ FILE_FOR_FUNCTION }}.
|
||||
{%- endif %}
|
||||
|
||||
PLEASE DO NOT modify the tests by yourselves -- Let me know if you think some of the tests are incorrect.
|
||||
@@ -20,7 +20,10 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.message import Message
|
||||
from openhands.events.action import Action, AgentFinishAction, MessageAction
|
||||
from openhands.events.action import (
|
||||
Action,
|
||||
AgentFinishAction,
|
||||
)
|
||||
from openhands.events.event import Event
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.memory.condenser import Condenser
|
||||
@@ -170,8 +173,7 @@ class CodeActAgent(Agent):
|
||||
f'Processing {len(condensed_history)} events from a total of {len(state.history)} events'
|
||||
)
|
||||
|
||||
initial_user_message = self._get_initial_user_message(state.history)
|
||||
messages = self._get_messages(condensed_history, initial_user_message)
|
||||
messages = self._get_messages(condensed_history)
|
||||
params: dict = {
|
||||
'messages': self.llm.format_messages_for_llm(messages),
|
||||
}
|
||||
@@ -214,29 +216,7 @@ class CodeActAgent(Agent):
|
||||
self.pending_actions.append(action)
|
||||
return self.pending_actions.popleft()
|
||||
|
||||
def _get_initial_user_message(self, history: list[Event]) -> MessageAction:
|
||||
"""Finds the initial user message action from the full history."""
|
||||
initial_user_message: MessageAction | None = None
|
||||
for event in history:
|
||||
if isinstance(event, MessageAction) and event.source == 'user':
|
||||
initial_user_message = event
|
||||
break
|
||||
|
||||
if initial_user_message is None:
|
||||
# This should not happen in a valid conversation
|
||||
logger.error(
|
||||
f'CRITICAL: Could not find the initial user MessageAction in the full {len(history)} events history.'
|
||||
)
|
||||
# Depending on desired robustness, could raise error or create a dummy action
|
||||
# and log the error
|
||||
raise ValueError(
|
||||
'Initial user message not found in history. Please report this issue.'
|
||||
)
|
||||
return initial_user_message
|
||||
|
||||
def _get_messages(
|
||||
self, events: list[Event], initial_user_message: MessageAction
|
||||
) -> list[Message]:
|
||||
def _get_messages(self, events: list[Event]) -> list[Message]:
|
||||
"""Constructs the message history for the LLM conversation.
|
||||
|
||||
This method builds a structured conversation history by processing events from the state
|
||||
@@ -273,7 +253,6 @@ class CodeActAgent(Agent):
|
||||
# Use ConversationMemory to process events (including SystemMessageAction)
|
||||
messages = self.conversation_memory.process_events(
|
||||
condensed_history=events,
|
||||
initial_user_action=initial_user_message,
|
||||
max_message_chars=self.llm.config.max_message_chars,
|
||||
vision_is_active=self.llm.vision_is_active(),
|
||||
)
|
||||
|
||||
@@ -44,9 +44,8 @@ Your primary role is to assist users by executing commands, modifying code, and
|
||||
* For bug fixes: Create tests to verify issues before implementing fixes
|
||||
* For new features: Consider test-driven development when appropriate
|
||||
* If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure
|
||||
* If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies
|
||||
4. IMPLEMENTATION: Make focused, minimal changes to address the problem
|
||||
5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests.
|
||||
5. VERIFICATION: Test your implementation thoroughly, including edge cases
|
||||
</PROBLEM_SOLVING_WORKFLOW>
|
||||
|
||||
<SECURITY>
|
||||
|
||||
@@ -76,8 +76,6 @@ from openhands.llm.metrics import Metrics, TokenUsage
|
||||
TRAFFIC_CONTROL_REMINDER = (
|
||||
"Please click on resume button if you'd like to continue, or start a new task."
|
||||
)
|
||||
ERROR_ACTION_NOT_EXECUTED_ID = 'AGENT_ERROR$ERROR_ACTION_NOT_EXECUTED'
|
||||
ERROR_ACTION_NOT_EXECUTED = 'The action has not been executed. This may have occurred because the user pressed the stop button, or because the runtime system crashed and restarted due to resource constraints. Any previously established system state, dependencies, or environment variables may have been lost.'
|
||||
|
||||
|
||||
class AgentController:
|
||||
@@ -192,7 +190,7 @@ class AgentController:
|
||||
logger.debug(f'System message got from agent: {system_message}')
|
||||
if system_message:
|
||||
self.event_stream.add_event(system_message, EventSource.AGENT)
|
||||
logger.info(f'System message added to event stream: {system_message}')
|
||||
logger.debug(f'System message added to event stream: {system_message}')
|
||||
|
||||
async def close(self, set_stop_state: bool = True) -> None:
|
||||
"""Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream.
|
||||
@@ -568,10 +566,7 @@ class AgentController:
|
||||
|
||||
# make a new ErrorObservation with the tool call metadata
|
||||
if not found_observation:
|
||||
obs = ErrorObservation(
|
||||
content=ERROR_ACTION_NOT_EXECUTED,
|
||||
error_id=ERROR_ACTION_NOT_EXECUTED_ID,
|
||||
)
|
||||
obs = ErrorObservation(content='The action has not been executed.')
|
||||
obs.tool_call_metadata = self._pending_action.tool_call_metadata
|
||||
obs._cause = self._pending_action.id # type: ignore[attr-defined]
|
||||
self.event_stream.add_event(obs, EventSource.AGENT)
|
||||
@@ -847,8 +842,6 @@ class AgentController:
|
||||
'contextwindowexceedederror' in error_str
|
||||
or 'prompt is too long' in error_str
|
||||
or 'input length and `max_tokens` exceed context limit' in error_str
|
||||
or 'please reduce the length of either one'
|
||||
in error_str # For OpenRouter context window errors
|
||||
or isinstance(e, ContextWindowExceededError)
|
||||
):
|
||||
if self.agent.config.enable_history_truncation:
|
||||
@@ -1027,7 +1020,7 @@ class AgentController:
|
||||
self.state.start_id = 0
|
||||
|
||||
self.log(
|
||||
'info',
|
||||
'debug',
|
||||
f'AgentController {self.id} - created new state. start_id: {self.state.start_id}',
|
||||
)
|
||||
else:
|
||||
@@ -1037,7 +1030,7 @@ class AgentController:
|
||||
self.state.start_id = 0
|
||||
|
||||
self.log(
|
||||
'info',
|
||||
'debug',
|
||||
f'AgentController {self.id} initializing history from event {self.state.start_id}',
|
||||
)
|
||||
|
||||
@@ -1150,169 +1143,70 @@ class AgentController:
|
||||
|
||||
def _handle_long_context_error(self) -> None:
|
||||
# When context window is exceeded, keep roughly half of agent interactions
|
||||
kept_events = self._apply_conversation_window()
|
||||
kept_event_ids = {e.id for e in kept_events}
|
||||
|
||||
self.log(
|
||||
'info',
|
||||
f'Context window exceeded. Keeping events with IDs: {kept_event_ids}',
|
||||
)
|
||||
|
||||
# The events to forget are those that are not in the kept set
|
||||
kept_event_ids = {
|
||||
e.id for e in self._apply_conversation_window(self.state.history)
|
||||
}
|
||||
forgotten_event_ids = {e.id for e in self.state.history} - kept_event_ids
|
||||
|
||||
if len(kept_event_ids) == 0:
|
||||
self.log(
|
||||
'warning',
|
||||
'No events kept after applying conversation window. This should not happen.',
|
||||
)
|
||||
|
||||
# verify that the first event id in kept_event_ids is the same as the start_id
|
||||
if len(kept_event_ids) > 0 and self.state.history[0].id not in kept_event_ids:
|
||||
self.log(
|
||||
'warning',
|
||||
f'First event after applying conversation window was not kept: {self.state.history[0].id} not in {kept_event_ids}',
|
||||
)
|
||||
# Save the ID of the first event in our truncated history for future reloading
|
||||
if self.state.history:
|
||||
self.state.start_id = self.state.history[0].id
|
||||
|
||||
# Add an error event to trigger another step by the agent
|
||||
self.event_stream.add_event(
|
||||
CondensationAction(
|
||||
forgotten_events_start_id=min(forgotten_event_ids)
|
||||
if forgotten_event_ids
|
||||
else 0,
|
||||
forgotten_events_end_id=max(forgotten_event_ids)
|
||||
if forgotten_event_ids
|
||||
else 0,
|
||||
forgotten_events_start_id=min(forgotten_event_ids),
|
||||
forgotten_events_end_id=max(forgotten_event_ids),
|
||||
),
|
||||
EventSource.AGENT,
|
||||
)
|
||||
|
||||
def _apply_conversation_window(self) -> list[Event]:
|
||||
def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
|
||||
"""Cuts history roughly in half when context window is exceeded.
|
||||
|
||||
It preserves action-observation pairs and ensures that the system message,
|
||||
the first user message, and its associated recall observation are always included
|
||||
at the beginning of the context window.
|
||||
It preserves action-observation pairs and ensures that the first user message is always included.
|
||||
|
||||
The algorithm:
|
||||
1. Identify essential initial events: System Message, First User Message, Recall Observation.
|
||||
2. Determine the slice of recent events to potentially keep.
|
||||
3. Validate the start of the recent slice for dangling observations.
|
||||
4. Combine essential events and validated recent events, ensuring essentials come first.
|
||||
1. Cut history in half
|
||||
2. Check first event in new history:
|
||||
- If Observation: find and include its Action
|
||||
- If MessageAction: ensure its related Action-Observation pair isn't split
|
||||
3. Always include the first user message
|
||||
|
||||
Args:
|
||||
events: List of events to filter
|
||||
|
||||
Returns:
|
||||
Filtered list of events keeping newest half while preserving pairs and essential initial events.
|
||||
Filtered list of events keeping newest half while preserving pairs
|
||||
"""
|
||||
if not self.state.history:
|
||||
return []
|
||||
if not events:
|
||||
return events
|
||||
|
||||
history = self.state.history
|
||||
|
||||
# 1. Identify essential initial events
|
||||
system_message: SystemMessageAction | None = None
|
||||
first_user_msg: MessageAction | None = None
|
||||
recall_action: RecallAction | None = None
|
||||
recall_observation: Observation | None = None
|
||||
|
||||
# Find System Message (should be the first event, if it exists)
|
||||
system_message = next(
|
||||
(e for e in history if isinstance(e, SystemMessageAction)), None
|
||||
)
|
||||
assert (
|
||||
system_message is None
|
||||
or isinstance(system_message, SystemMessageAction)
|
||||
and system_message.id == history[0].id
|
||||
# Find first user message - we'll need to ensure it's included
|
||||
first_user_msg = next(
|
||||
(
|
||||
e
|
||||
for e in events
|
||||
if isinstance(e, MessageAction) and e.source == EventSource.USER
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
# Find First User Message, which MUST exist
|
||||
first_user_msg = self._first_user_message()
|
||||
if first_user_msg is None:
|
||||
raise RuntimeError('No first user message found in the event stream.')
|
||||
# cut in half
|
||||
mid_point = max(1, len(events) // 2)
|
||||
kept_events = events[mid_point:]
|
||||
if len(kept_events) > 0 and isinstance(kept_events[0], Observation):
|
||||
kept_events = kept_events[1:]
|
||||
|
||||
first_user_msg_index = -1
|
||||
for i, event in enumerate(history):
|
||||
if isinstance(event, MessageAction) and event.source == EventSource.USER:
|
||||
first_user_msg = event
|
||||
first_user_msg_index = i
|
||||
break
|
||||
# Ensure first user message is included
|
||||
if first_user_msg and first_user_msg not in kept_events:
|
||||
kept_events = [first_user_msg] + kept_events
|
||||
|
||||
# Find Recall Action and Observation related to the First User Message
|
||||
if first_user_msg is not None and first_user_msg_index != -1:
|
||||
# Look for RecallAction after the first user message
|
||||
for i in range(first_user_msg_index + 1, len(history)):
|
||||
event = history[i]
|
||||
if (
|
||||
isinstance(event, RecallAction)
|
||||
and event.query == first_user_msg.content
|
||||
):
|
||||
# Found RecallAction, now look for its Observation
|
||||
recall_action = event
|
||||
for j in range(i + 1, len(history)):
|
||||
obs_event = history[j]
|
||||
# Check for Observation caused by this RecallAction
|
||||
if (
|
||||
isinstance(obs_event, Observation)
|
||||
and obs_event.cause == recall_action.id
|
||||
):
|
||||
recall_observation = obs_event
|
||||
break # Found the observation, stop inner loop
|
||||
break # Found the recall action (and maybe obs), stop outer loop
|
||||
|
||||
essential_events: list[Event] = []
|
||||
if system_message:
|
||||
essential_events.append(system_message)
|
||||
# start_id points to first user message
|
||||
if first_user_msg:
|
||||
essential_events.append(first_user_msg)
|
||||
# Also keep the RecallAction that triggered the essential RecallObservation
|
||||
if recall_action:
|
||||
essential_events.append(recall_action)
|
||||
if recall_observation:
|
||||
essential_events.append(recall_observation)
|
||||
self.state.start_id = first_user_msg.id
|
||||
|
||||
# 2. Determine the slice of recent events to potentially keep
|
||||
num_non_essential_events = len(history) - len(essential_events)
|
||||
# Keep roughly half of the non-essential events, minimum 1
|
||||
num_recent_to_keep = max(1, num_non_essential_events // 2)
|
||||
|
||||
# Calculate the starting index for the recent slice
|
||||
slice_start_index = len(history) - num_recent_to_keep
|
||||
slice_start_index = max(0, slice_start_index) # Ensure index is not negative
|
||||
recent_events_slice = history[slice_start_index:]
|
||||
|
||||
# 3. Validate the start of the recent slice for dangling observations
|
||||
# IMPORTANT: Most observations in history are tool call results, which cannot be without their action, or we get an LLM API error
|
||||
first_valid_event_index = 0
|
||||
for i, event in enumerate(recent_events_slice):
|
||||
if isinstance(event, Observation):
|
||||
first_valid_event_index += 1
|
||||
else:
|
||||
break
|
||||
# If all events in the slice are dangling observations, we need to keep at least one
|
||||
if first_valid_event_index == len(recent_events_slice):
|
||||
self.log(
|
||||
'warning',
|
||||
'All recent events are dangling observations, which we truncate. This means the agent has only the essential first events. This should not happen.',
|
||||
)
|
||||
|
||||
# Adjust the recent_events_slice if dangling observations were found at the start
|
||||
if first_valid_event_index < len(recent_events_slice):
|
||||
validated_recent_events = recent_events_slice[first_valid_event_index:]
|
||||
if first_valid_event_index > 0:
|
||||
self.log(
|
||||
'debug',
|
||||
f'Removed {first_valid_event_index} dangling observation(s) from the start of recent event slice.',
|
||||
)
|
||||
else:
|
||||
validated_recent_events = []
|
||||
|
||||
# 4. Combine essential events and validated recent events
|
||||
events_to_keep: list[Event] = essential_events + validated_recent_events
|
||||
self.log('debug', f'History truncated. Kept {len(events_to_keep)} events.')
|
||||
|
||||
return events_to_keep
|
||||
return kept_events
|
||||
|
||||
def _is_stuck(self) -> bool:
|
||||
"""Checks if the agent or its delegate is stuck in a loop.
|
||||
|
||||
@@ -14,14 +14,12 @@ from openhands.core.cli_commands import (
|
||||
)
|
||||
from openhands.core.cli_tui import (
|
||||
UsageMetrics,
|
||||
display_agent_running_message,
|
||||
display_banner,
|
||||
display_event,
|
||||
display_initial_user_prompt,
|
||||
display_initialization_animation,
|
||||
display_runtime_initialization_message,
|
||||
display_welcome_message,
|
||||
process_agent_pause,
|
||||
read_confirmation_input,
|
||||
read_prompt_input,
|
||||
)
|
||||
@@ -101,7 +99,6 @@ async def run_session(
|
||||
|
||||
sid = str(uuid4())
|
||||
is_loaded = asyncio.Event()
|
||||
is_paused = asyncio.Event()
|
||||
|
||||
# Show runtime initialization message
|
||||
display_runtime_initialization_message(config.runtime)
|
||||
@@ -127,12 +124,10 @@ async def run_session(
|
||||
|
||||
usage_metrics = UsageMetrics()
|
||||
|
||||
async def prompt_for_next_task(agent_state: str):
|
||||
async def prompt_for_next_task():
|
||||
nonlocal reload_microagents, new_session_requested
|
||||
while True:
|
||||
next_message = await read_prompt_input(
|
||||
agent_state, multiline=config.cli_multiline_input
|
||||
)
|
||||
next_message = await read_prompt_input(config.cli_multiline_input)
|
||||
|
||||
if not next_message.strip():
|
||||
continue
|
||||
@@ -155,23 +150,14 @@ async def run_session(
|
||||
return
|
||||
|
||||
async def on_event_async(event: Event) -> None:
|
||||
nonlocal reload_microagents, is_paused
|
||||
nonlocal reload_microagents
|
||||
display_event(event, config)
|
||||
update_usage_metrics(event, usage_metrics)
|
||||
|
||||
# Pause the agent if the pause event is set (if Ctrl-P is pressed)
|
||||
if is_paused.is_set():
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.PAUSED),
|
||||
EventSource.USER,
|
||||
)
|
||||
is_paused.clear()
|
||||
|
||||
if isinstance(event, AgentStateChangedObservation):
|
||||
if event.agent_state in [
|
||||
AgentState.AWAITING_USER_INPUT,
|
||||
AgentState.FINISHED,
|
||||
AgentState.PAUSED,
|
||||
]:
|
||||
# Reload microagents after initialization of repo.md
|
||||
if reload_microagents:
|
||||
@@ -180,28 +166,20 @@ async def run_session(
|
||||
)
|
||||
memory.load_user_workspace_microagents(microagents)
|
||||
reload_microagents = False
|
||||
await prompt_for_next_task(event.agent_state)
|
||||
await prompt_for_next_task()
|
||||
|
||||
if event.agent_state == AgentState.AWAITING_USER_CONFIRMATION:
|
||||
# Only display the confirmation prompt if the agent is not paused
|
||||
if not is_paused.is_set():
|
||||
user_confirmed = await read_confirmation_input()
|
||||
if user_confirmed:
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.USER_CONFIRMED),
|
||||
EventSource.USER,
|
||||
)
|
||||
else:
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.USER_REJECTED),
|
||||
EventSource.USER,
|
||||
)
|
||||
|
||||
if event.agent_state == AgentState.RUNNING:
|
||||
# Enable pause/resume functionality only if the confirmation mode is disabled
|
||||
if not config.security.confirmation_mode:
|
||||
display_agent_running_message()
|
||||
loop.create_task(process_agent_pause(is_paused))
|
||||
user_confirmed = await read_confirmation_input()
|
||||
if user_confirmed:
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.USER_CONFIRMED),
|
||||
EventSource.USER,
|
||||
)
|
||||
else:
|
||||
event_stream.add_event(
|
||||
ChangeAgentStateAction(AgentState.USER_REJECTED),
|
||||
EventSource.USER,
|
||||
)
|
||||
|
||||
def on_event(event: Event) -> None:
|
||||
loop.create_task(on_event_async(event))
|
||||
@@ -234,7 +212,7 @@ async def run_session(
|
||||
clear()
|
||||
|
||||
# Show OpenHands banner and session ID
|
||||
display_banner(session_id=sid)
|
||||
display_banner(session_id=sid, is_loaded=is_loaded)
|
||||
|
||||
# Show OpenHands welcome
|
||||
display_welcome_message()
|
||||
@@ -247,7 +225,7 @@ async def run_session(
|
||||
)
|
||||
else:
|
||||
# Otherwise prompt for the user's first message right away
|
||||
asyncio.create_task(prompt_for_next_task(''))
|
||||
asyncio.create_task(prompt_for_next_task())
|
||||
|
||||
await run_agent_until_done(
|
||||
controller, runtime, memory, [AgentState.STOPPED, AgentState.ERROR]
|
||||
|
||||
@@ -70,8 +70,6 @@ async def handle_commands(
|
||||
)
|
||||
elif command == '/settings':
|
||||
await handle_settings_command(config, settings_store)
|
||||
elif command == '/resume':
|
||||
close_repl, new_session_requested = await handle_resume_command(event_stream)
|
||||
else:
|
||||
close_repl = True
|
||||
action = MessageAction(content=command)
|
||||
@@ -185,28 +183,6 @@ async def handle_settings_command(
|
||||
await modify_llm_settings_advanced(config, settings_store)
|
||||
|
||||
|
||||
# FIXME: Currently there's an issue with the actual 'resume' behavior.
|
||||
# Setting the agent state to RUNNING will currently freeze the agent without continuing with the rest of the task.
|
||||
# This is a workaround to handle the resume command for the time being. Replace user message with the state change event once the issue is fixed.
|
||||
async def handle_resume_command(
|
||||
event_stream: EventStream,
|
||||
) -> tuple[bool, bool]:
|
||||
close_repl = True
|
||||
new_session_requested = False
|
||||
|
||||
event_stream.add_event(
|
||||
MessageAction(content='continue'),
|
||||
EventSource.USER,
|
||||
)
|
||||
|
||||
# event_stream.add_event(
|
||||
# ChangeAgentStateAction(AgentState.RUNNING),
|
||||
# EventSource.ENVIRONMENT,
|
||||
# )
|
||||
|
||||
return close_repl, new_session_requested
|
||||
|
||||
|
||||
async def init_repository(current_dir: str) -> bool:
|
||||
repo_file_path = Path(current_dir) / '.openhands' / 'microagents' / 'repo.md'
|
||||
init_repo = False
|
||||
|
||||
@@ -10,9 +10,7 @@ from prompt_toolkit import PromptSession, print_formatted_text
|
||||
from prompt_toolkit.application import Application
|
||||
from prompt_toolkit.completion import Completer, Completion
|
||||
from prompt_toolkit.formatted_text import HTML, FormattedText, StyleAndTextTuples
|
||||
from prompt_toolkit.input import create_input
|
||||
from prompt_toolkit.key_binding import KeyBindings
|
||||
from prompt_toolkit.keys import Keys
|
||||
from prompt_toolkit.layout.containers import HSplit, Window
|
||||
from prompt_toolkit.layout.controls import FormattedTextControl
|
||||
from prompt_toolkit.layout.layout import Layout
|
||||
@@ -24,7 +22,6 @@ from prompt_toolkit.widgets import Frame, TextArea
|
||||
|
||||
from openhands import __version__
|
||||
from openhands.core.config import AppConfig
|
||||
from openhands.core.schema import AgentState
|
||||
from openhands.events import EventSource
|
||||
from openhands.events.action import (
|
||||
Action,
|
||||
@@ -35,7 +32,6 @@ from openhands.events.action import (
|
||||
)
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import (
|
||||
AgentStateChangedObservation,
|
||||
CmdOutputObservation,
|
||||
FileEditObservation,
|
||||
FileReadObservation,
|
||||
@@ -60,7 +56,6 @@ COMMANDS = {
|
||||
'/status': 'Display session details and usage metrics',
|
||||
'/new': 'Create a new session',
|
||||
'/settings': 'Display and modify current settings',
|
||||
'/resume': 'Resume the agent',
|
||||
}
|
||||
|
||||
|
||||
@@ -119,7 +114,7 @@ def display_initialization_animation(text, is_loaded: asyncio.Event):
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def display_banner(session_id: str):
|
||||
def display_banner(session_id: str, is_loaded: asyncio.Event):
|
||||
print_formatted_text(
|
||||
HTML(r"""<gold>
|
||||
___ _ _ _
|
||||
@@ -134,8 +129,11 @@ def display_banner(session_id: str):
|
||||
|
||||
print_formatted_text(HTML(f'<grey>OpenHands CLI v{__version__}</grey>'))
|
||||
|
||||
banner_text = (
|
||||
'Initialized session' if is_loaded.is_set() else 'Initializing session'
|
||||
)
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML(f'<grey>Initialized session {session_id}</grey>'))
|
||||
print_formatted_text(HTML(f'<grey>{banner_text} {session_id}</grey>'))
|
||||
print_formatted_text('')
|
||||
|
||||
|
||||
@@ -179,8 +177,6 @@ def display_event(event: Event, config: AppConfig) -> None:
|
||||
display_file_edit(event)
|
||||
if isinstance(event, FileReadObservation):
|
||||
display_file_read(event)
|
||||
if isinstance(event, AgentStateChangedObservation):
|
||||
display_agent_paused_message(event.agent_state)
|
||||
|
||||
|
||||
def display_message(message: str):
|
||||
@@ -393,58 +389,77 @@ def display_status(usage_metrics: UsageMetrics, session_id: str):
|
||||
display_usage_metrics(usage_metrics)
|
||||
|
||||
|
||||
def display_agent_running_message():
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML('<gold>Agent running...</gold> <grey>(Ctrl-P to pause)</grey>')
|
||||
)
|
||||
|
||||
|
||||
def display_agent_paused_message(agent_state: str):
|
||||
if agent_state != AgentState.PAUSED:
|
||||
return
|
||||
print_formatted_text('')
|
||||
print_formatted_text(
|
||||
HTML('<gold>Agent paused</gold> <grey>(type /resume to resume)</grey>')
|
||||
)
|
||||
|
||||
|
||||
# Common input functions
|
||||
class CommandCompleter(Completer):
|
||||
"""Custom completer for commands."""
|
||||
|
||||
def __init__(self, agent_state: str):
|
||||
super().__init__()
|
||||
self.agent_state = agent_state
|
||||
|
||||
def get_completions(self, document, complete_event):
|
||||
text = document.text_before_cursor.lstrip()
|
||||
text = document.text
|
||||
|
||||
# Only show completions if the user has typed '/'
|
||||
if text.startswith('/'):
|
||||
available_commands = dict(COMMANDS)
|
||||
if self.agent_state != AgentState.PAUSED:
|
||||
available_commands.pop('/resume', None)
|
||||
|
||||
for command, description in available_commands.items():
|
||||
if command.startswith(text):
|
||||
# If just '/' is typed, show all commands
|
||||
if text == '/':
|
||||
for command, description in COMMANDS.items():
|
||||
yield Completion(
|
||||
command,
|
||||
start_position=-len(text),
|
||||
display_meta=description,
|
||||
style='bg:ansidarkgray fg:ansiwhite',
|
||||
command[1:], # Remove the leading '/' as it's already typed
|
||||
start_position=0,
|
||||
display=f'{command} - {description}',
|
||||
)
|
||||
# Otherwise show matching commands
|
||||
else:
|
||||
for command, description in COMMANDS.items():
|
||||
if command.startswith(text):
|
||||
yield Completion(
|
||||
command[len(text) :], # Complete the remaining part
|
||||
start_position=0,
|
||||
display=f'{command} - {description}',
|
||||
)
|
||||
|
||||
|
||||
def create_prompt_session():
|
||||
return PromptSession(style=DEFAULT_STYLE)
|
||||
prompt_session = PromptSession(style=DEFAULT_STYLE)
|
||||
|
||||
# RPrompt animation related variables
|
||||
SPINNER_FRAMES = [
|
||||
'[ ■□□□ ]',
|
||||
'[ □■□□ ]',
|
||||
'[ □□■□ ]',
|
||||
'[ □□□■ ]',
|
||||
'[ □□■□ ]',
|
||||
'[ □■□□ ]',
|
||||
]
|
||||
ANIMATION_INTERVAL = 0.2 # seconds
|
||||
|
||||
current_frame_index = 0
|
||||
last_update_time = time.monotonic()
|
||||
|
||||
|
||||
async def read_prompt_input(agent_state: str, multiline=False):
|
||||
# RPrompt function for the user confirmation
|
||||
def get_rprompt() -> FormattedText:
|
||||
"""
|
||||
Returns the current animation frame for the rprompt.
|
||||
This function is called by prompt_toolkit during rendering.
|
||||
"""
|
||||
global current_frame_index, last_update_time
|
||||
|
||||
# Only update the frame if enough time has passed
|
||||
# This prevents excessive recalculation during rendering
|
||||
now = time.monotonic()
|
||||
if now - last_update_time > ANIMATION_INTERVAL:
|
||||
current_frame_index = (current_frame_index + 1) % len(SPINNER_FRAMES)
|
||||
last_update_time = now
|
||||
|
||||
# Return the frame wrapped in FormattedText
|
||||
return FormattedText(
|
||||
[
|
||||
('', ' '), # Add a space before the spinner
|
||||
(COLOR_GOLD, SPINNER_FRAMES[current_frame_index]),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
async def read_prompt_input(multiline=False):
|
||||
try:
|
||||
prompt_session = create_prompt_session()
|
||||
prompt_session.completer = (
|
||||
CommandCompleter(agent_state) if not multiline else None
|
||||
)
|
||||
|
||||
if multiline:
|
||||
kb = KeyBindings()
|
||||
|
||||
@@ -455,54 +470,38 @@ async def read_prompt_input(agent_state: str, multiline=False):
|
||||
with patch_stdout():
|
||||
print_formatted_text('')
|
||||
message = await prompt_session.prompt_async(
|
||||
HTML(
|
||||
'<gold>Enter your message and press Ctrl-D to finish:</gold>\n'
|
||||
),
|
||||
'Enter your message and press Ctrl+D to finish:\n',
|
||||
multiline=True,
|
||||
key_bindings=kb,
|
||||
)
|
||||
else:
|
||||
with patch_stdout():
|
||||
print_formatted_text('')
|
||||
prompt_session.completer = CommandCompleter()
|
||||
message = await prompt_session.prompt_async(
|
||||
HTML('<gold>> </gold>'),
|
||||
'> ',
|
||||
)
|
||||
return message if message is not None else ''
|
||||
return message
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
return '/exit'
|
||||
|
||||
|
||||
async def read_confirmation_input() -> bool:
|
||||
async def read_confirmation_input():
|
||||
try:
|
||||
prompt_session = create_prompt_session()
|
||||
|
||||
with patch_stdout():
|
||||
print_formatted_text('')
|
||||
confirmation: str = await prompt_session.prompt_async(
|
||||
HTML('<gold>Proceed with action? (y)es/(n)o > </gold>'),
|
||||
prompt_session.completer = None
|
||||
confirmation = await prompt_session.prompt_async(
|
||||
'Proceed with action? (y)es/(n)o > ',
|
||||
rprompt=get_rprompt,
|
||||
refresh_interval=ANIMATION_INTERVAL / 2,
|
||||
)
|
||||
|
||||
confirmation = '' if confirmation is None else confirmation.strip().lower()
|
||||
prompt_session.rprompt = None
|
||||
confirmation = confirmation.strip().lower()
|
||||
return confirmation in ['y', 'yes']
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
return False
|
||||
|
||||
|
||||
async def process_agent_pause(done: asyncio.Event) -> None:
|
||||
input = create_input()
|
||||
|
||||
def keys_ready():
|
||||
for key_press in input.read_keys():
|
||||
if key_press.key == Keys.ControlP:
|
||||
print_formatted_text('')
|
||||
print_formatted_text(HTML('<gold>Pausing the agent...</gold>'))
|
||||
done.set()
|
||||
|
||||
with input.raw_mode():
|
||||
with input.attach(keys_ready):
|
||||
await done.wait()
|
||||
|
||||
|
||||
def cli_confirm(
|
||||
question: str = 'Are you sure?', choices: list[str] | None = None
|
||||
) -> int:
|
||||
|
||||
@@ -41,7 +41,7 @@ class GitHubService(BaseGitService, GitService):
|
||||
if token:
|
||||
self.token = token
|
||||
|
||||
if base_domain:
|
||||
if base_domain and base_domain != "github.com":
|
||||
self.BASE_URL = f'https://{base_domain}/api/v3'
|
||||
|
||||
@property
|
||||
@@ -120,12 +120,6 @@ class GitHubService(BaseGitService, GitService):
|
||||
email=response.get('email'),
|
||||
)
|
||||
|
||||
async def verify_access(self) -> bool:
|
||||
"""Verify if the token is valid by making a simple request."""
|
||||
url = f'{self.BASE_URL}'
|
||||
await self._make_request(url)
|
||||
return True
|
||||
|
||||
async def _fetch_paginated_repos(
|
||||
self, url: str, params: dict, max_repos: int, extract_key: str | None = None
|
||||
) -> list[dict]:
|
||||
|
||||
@@ -34,6 +34,7 @@ from openhands.server.types import AppMode
|
||||
class ProviderToken(BaseModel):
|
||||
token: SecretStr | None = Field(default=None)
|
||||
user_id: str | None = Field(default=None)
|
||||
base_domain: str | None = Field(default=None)
|
||||
|
||||
model_config = {
|
||||
'frozen': True, # Makes the entire model immutable
|
||||
@@ -43,15 +44,20 @@ class ProviderToken(BaseModel):
|
||||
@classmethod
|
||||
def from_value(cls, token_value: ProviderToken | dict[str, str]) -> ProviderToken:
|
||||
"""Factory method to create a ProviderToken from various input types"""
|
||||
if isinstance(token_value, ProviderToken):
|
||||
if isinstance(token_value, cls):
|
||||
return token_value
|
||||
elif isinstance(token_value, dict):
|
||||
token_str = token_value.get('token')
|
||||
user_id = token_value.get('user_id')
|
||||
return cls(token=SecretStr(token_str), user_id=user_id)
|
||||
base_domain = token_value.get('base_domain')
|
||||
return cls(
|
||||
token=SecretStr(token_str) if token_str is not None else None,
|
||||
user_id=user_id,
|
||||
base_domain=base_domain,
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError('Unsupport Provider token type')
|
||||
raise ValueError('Unsupported Provider token type')
|
||||
|
||||
|
||||
PROVIDER_TOKEN_TYPE = MappingProxyType[ProviderType, ProviderToken]
|
||||
@@ -60,10 +66,6 @@ PROVIDER_TOKEN_TYPE_WITH_JSON_SCHEMA = Annotated[
|
||||
PROVIDER_TOKEN_TYPE,
|
||||
WithJsonSchema({'type': 'object', 'additionalProperties': {'type': 'string'}}),
|
||||
]
|
||||
CUSTOM_SECRETS_TYPE_WITH_JSON_SCHEMA = Annotated[
|
||||
CUSTOM_SECRETS_TYPE,
|
||||
WithJsonSchema({'type': 'object', 'additionalProperties': {'type': 'string'}}),
|
||||
]
|
||||
|
||||
|
||||
class SecretStore(BaseModel):
|
||||
@@ -71,8 +73,8 @@ class SecretStore(BaseModel):
|
||||
default_factory=lambda: MappingProxyType({})
|
||||
)
|
||||
|
||||
custom_secrets: CUSTOM_SECRETS_TYPE_WITH_JSON_SCHEMA = Field(
|
||||
default_factory=lambda: MappingProxyType({}),
|
||||
custom_secrets: CUSTOM_SECRETS_TYPE = Field(
|
||||
default_factory=lambda: MappingProxyType({})
|
||||
)
|
||||
|
||||
model_config = {
|
||||
@@ -102,6 +104,7 @@ class SecretStore(BaseModel):
|
||||
if expose_secrets
|
||||
else pydantic_encoder(provider_token.token),
|
||||
'user_id': provider_token.user_id,
|
||||
'base_domain': provider_token.base_domain,
|
||||
}
|
||||
|
||||
return tokens
|
||||
|
||||
@@ -23,7 +23,7 @@ async def validate_provider_token(
|
||||
# Try GitHub first
|
||||
try:
|
||||
github_service = GitHubService(token=token, base_domain=base_domain)
|
||||
await github_service.verify_access()
|
||||
await github_service.get_user()
|
||||
return ProviderType.GITHUB
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -54,7 +54,6 @@ class ConversationMemory:
|
||||
def process_events(
|
||||
self,
|
||||
condensed_history: list[Event],
|
||||
initial_user_action: MessageAction,
|
||||
max_message_chars: int | None = None,
|
||||
vision_is_active: bool = False,
|
||||
) -> list[Message]:
|
||||
@@ -67,14 +66,12 @@ class ConversationMemory:
|
||||
max_message_chars: The maximum number of characters in the content of an event included
|
||||
in the prompt to the LLM. Larger observations are truncated.
|
||||
vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included.
|
||||
initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly.
|
||||
"""
|
||||
|
||||
events = condensed_history
|
||||
|
||||
# Ensure the event list starts with SystemMessageAction, then MessageAction(source='user')
|
||||
# Ensure the system message exists (handles legacy cases)
|
||||
self._ensure_system_message(events)
|
||||
self._ensure_initial_user_message(events, initial_user_action)
|
||||
|
||||
# log visual browsing status
|
||||
logger.debug(f'Visual browsing: {self.agent_config.enable_som_visual_browsing}')
|
||||
@@ -230,8 +227,8 @@ class ConversationMemory:
|
||||
pending_tool_call_action_messages[llm_response.id] = Message(
|
||||
role=getattr(assistant_msg, 'role', 'assistant'),
|
||||
# tool call content SHOULD BE a string
|
||||
content=[TextContent(text=assistant_msg.content)]
|
||||
if assistant_msg.content and assistant_msg.content.strip()
|
||||
content=[TextContent(text=assistant_msg.content or '')]
|
||||
if assistant_msg.content is not None
|
||||
else [],
|
||||
tool_calls=assistant_msg.tool_calls,
|
||||
)
|
||||
@@ -702,43 +699,6 @@ class ConversationMemory:
|
||||
system_message = SystemMessageAction(content=system_prompt)
|
||||
# Insert the system message directly at the beginning of the events list
|
||||
events.insert(0, system_message)
|
||||
logger.info(
|
||||
logger.debug(
|
||||
'[ConversationMemory] Added SystemMessageAction for backward compatibility'
|
||||
)
|
||||
|
||||
def _ensure_initial_user_message(
|
||||
self, events: list[Event], initial_user_action: MessageAction
|
||||
) -> None:
|
||||
"""Checks if the second event is a user MessageAction and inserts the provided one if needed."""
|
||||
if (
|
||||
not events
|
||||
): # Should have system message from previous step, but safety check
|
||||
logger.error('Cannot ensure initial user message: event list is empty.')
|
||||
# Or raise? Let's log for now, _ensure_system_message should handle this.
|
||||
return
|
||||
|
||||
# We expect events[0] to be SystemMessageAction after _ensure_system_message
|
||||
if len(events) == 1:
|
||||
# Only system message exists
|
||||
logger.info(
|
||||
'Initial user message action was missing. Inserting the initial user message.'
|
||||
)
|
||||
events.insert(1, initial_user_action)
|
||||
elif not isinstance(events[1], MessageAction) or events[1].source != 'user':
|
||||
# The second event exists but is not the correct initial user message action.
|
||||
# We will insert the correct one provided.
|
||||
logger.info(
|
||||
'Second event was not the initial user message action. Inserting correct one at index 1.'
|
||||
)
|
||||
|
||||
# Insert the user message event at index 1. This will be the second message as LLM APIs expect
|
||||
# but something was wrong with the history, so log all we can.
|
||||
events.insert(1, initial_user_action)
|
||||
|
||||
# Else: events[1] is already a user MessageAction.
|
||||
# Check if it matches the one provided (if any discrepancy, log warning but proceed).
|
||||
elif events[1] != initial_user_action:
|
||||
logger.debug(
|
||||
'The user MessageAction at index 1 does not match the provided initial_user_action. '
|
||||
'Proceeding with the one found in condensed history.'
|
||||
)
|
||||
|
||||
@@ -4,7 +4,6 @@ from typing import overload
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action.agent import CondensationAction
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation.agent import AgentCondensationObservation
|
||||
@@ -66,8 +65,6 @@ class View(BaseModel):
|
||||
break
|
||||
|
||||
if summary is not None and summary_offset is not None:
|
||||
logger.info(f'Inserting summary at offset {summary_offset}')
|
||||
|
||||
kept_events.insert(
|
||||
summary_offset, AgentCondensationObservation(content=summary)
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user