Compare commits

..

6 Commits

Author SHA1 Message Date
Bashwara Undupitiya 35b381f3a8 fix: Update folder security dialog styling (#7886) 2025-04-16 14:33:33 -04:00
Robert Brennan c616f32867 Fix: Ensure consistent tab height when workspace tab is selected (#7885)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 14:11:08 -04:00
tofarr e79baeb93b Fix for error on close (#7884) 2025-04-16 14:11:03 -04:00
Xingyao Wang 516dff13c5 Update repo microagent docs with frontend action handling information (#7856)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 14:10:54 -04:00
Xingyao Wang eed02e8ce7 frontend: fix terminal prompt and command styling (#7872)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-04-16 14:10:43 -04:00
mamoodi d8c9ce74e3 Release 0.33.0 2025-04-16 09:56:40 -04:00
291 changed files with 4744 additions and 17622 deletions
+53
View File
@@ -0,0 +1,53 @@
# Workflow that uses the DummyAgent to run a simple task
name: Run E2E test with dummy agent
# Always run on "main"
# Always run on PRs
on:
push:
branches:
- main
pull_request:
# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
concurrency:
group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }}
cancel-in-progress: true
jobs:
test:
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Setup Node.js
uses: useblacksmith/setup-node@v5
with:
node-version: '22.x'
- name: Install poetry via pipx
run: pipx install poetry
- name: Set up Python
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
cache: 'poetry'
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation
- name: Build Environment
run: make build
- name: Run tests
run: |
set -e
SANDBOX_FORCE_REBUILD_RUNTIME=True poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
- name: Check exit code
run: |
if [ $? -ne 0 ]; then
echo "Test failed"
exit 1
else
echo "Test passed"
fi
+1 -1
View File
@@ -179,7 +179,7 @@ jobs:
echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.PAT_TOKEN || github.token }}" >> $GITHUB_ENV
echo "SANDBOX_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
# Set branch variables
echo "TARGET_BRANCH=${{ inputs.target_branch || 'main' }}" >> $GITHUB_ENV
+1 -1
View File
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.34-nikolaik`
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.33-nikolaik`
## Develop inside Docker container
+1 -14
View File
@@ -39,7 +39,6 @@ ifeq ($(INSTALL_DOCKER),)
@$(MAKE) -s check-docker
endif
@$(MAKE) -s check-poetry
@$(MAKE) -s check-tmux
@echo "$(GREEN)Dependencies checked successfully.$(RESET)"
check-system:
@@ -102,18 +101,6 @@ check-docker:
exit 1; \
fi
check-tmux:
@echo "$(YELLOW)Checking tmux installation...$(RESET)"
@if command -v tmux > /dev/null; then \
echo "$(BLUE)$(shell tmux -V) is already installed.$(RESET)"; \
else \
echo "$(YELLOW)╔════════════════════════════════════════════════════════════════════════════╗$(RESET)"; \
echo "$(YELLOW)║ OPTIONAL: tmux is not installed. ║$(RESET)"; \
echo "$(YELLOW)║ Some advanced terminal features may not work without tmux. ║$(RESET)"; \
echo "$(YELLOW)║ You can install it if needed, but it's not required for development. ║$(RESET)"; \
echo "$(YELLOW)╚════════════════════════════════════════════════════════════════════════════╝$(RESET)"; \
fi
check-poetry:
@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
@if command -v poetry > /dev/null; then \
@@ -188,7 +175,7 @@ install-pre-commit-hooks:
lint-backend:
@echo "$(YELLOW)Running linters...$(RESET)"
@poetry run pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
@poetry run pre-commit run --files openhands/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
lint-frontend:
@echo "$(YELLOW)Running linters for frontend...$(RESET)"
+4 -4
View File
@@ -18,7 +18,7 @@
<br/>
<a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
<hr>
</div>
@@ -52,17 +52,17 @@ system requirements and more information.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.34
docker.all-hands.dev/all-hands-ai/openhands:0.33
```
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
-13
View File
@@ -221,22 +221,9 @@ enable_browsing = true
# Whether the LLM draft editor is enabled
enable_llm_editor = false
# Whether the standard editor tool (str_replace_editor) is enabled
# Only has an effect if enable_llm_editor is False
enable_editor = true
# Whether the IPython tool is enabled
enable_jupyter = true
# Whether the command tool is enabled
enable_cmd = true
# Whether the think tool is enabled
enable_think = true
# Whether the finish tool is enabled
enable_finish = true
# LLM config group to use
#llm_config = 'your-llm-config-group'
+3 -3
View File
@@ -61,8 +61,8 @@ RUN add-apt-repository ppa:deadsnakes/ppa \
&& apt-get install -y python3.12 python3.12-venv python3.12-dev python3-pip \
&& ln -s /usr/bin/python3.12 /usr/bin/python
# NodeJS >= 22.x
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
# NodeJS >= 18.17.1
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
&& apt-get install -y nodejs
# Poetry >= 1.8
@@ -108,7 +108,7 @@ WORKDIR /app
# cache build dependencies
RUN \
--mount=type=bind,source=./,target=/app/,rw \
--mount=type=bind,source=./,target=/app/ \
<<EOF
#!/bin/bash
make -s clean
+1 -1
View File
@@ -11,7 +11,7 @@ services:
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
- SANDBOX_API_HOSTNAME=host.docker.internal
#
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.34-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.33-nikolaik}
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:
+1 -1
View File
@@ -7,7 +7,7 @@ services:
image: openhands:latest
container_name: openhands-app-${DATE:-}
environment:
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik}
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:
-1
View File
@@ -3,7 +3,6 @@
# Production
/build
/static/swagger-ui
# Generated files
.docusaurus
-6
View File
@@ -36,7 +36,6 @@ const config: Config = {
mermaid: true,
},
themes: ['@docusaurus/theme-mermaid'],
plugins: [],
presets: [
[
'classic',
@@ -76,11 +75,6 @@ const config: Config = {
position: 'left',
label: 'User Guides',
},
{
href: 'https://docs.all-hands.dev/swagger-ui/', // FIXME: this should be a relative path, but docusarus steals the click
label: 'API',
position: 'left',
},
{
type: 'localeDropdown',
position: 'left',
-102
View File
@@ -1,102 +0,0 @@
const fs = require('fs');
const path = require('path');
const swaggerUiDist = require('swagger-ui-dist');
/**
* This script manually sets up Swagger UI for the Docusaurus documentation.
*
* Why we need this approach:
* 1. Docusaurus doesn't have a built-in way to integrate Swagger UI
* 2. We need to copy the necessary files from swagger-ui-dist to our static directory
* 3. We need to create a custom index.html file that points to our OpenAPI spec
* 4. This approach allows us to customize the Swagger UI to match our documentation style
*/
// Get the absolute path to the swagger-ui-dist package
const swaggerUiDistPath = swaggerUiDist.getAbsoluteFSPath();
// Create the target directory if it doesn't exist
const targetDir = path.join(__dirname, 'static', 'swagger-ui');
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true });
}
// Copy all files from swagger-ui-dist to our target directory
const files = fs.readdirSync(swaggerUiDistPath);
files.forEach(file => {
const sourcePath = path.join(swaggerUiDistPath, file);
const targetPath = path.join(targetDir, file);
// Skip directories and non-essential files
if (fs.statSync(sourcePath).isDirectory() ||
file === 'package.json' ||
file === 'README.md' ||
file.endsWith('.map')) {
return;
}
fs.copyFileSync(sourcePath, targetPath);
});
// Create a custom index.html file that points to our OpenAPI spec
const indexHtml = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>OpenHands API Documentation</title>
<link rel="stylesheet" type="text/css" href="./swagger-ui.css" />
<link rel="icon" type="image/png" href="./favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="./favicon-16x16.png" sizes="16x16" />
<style>
html {
box-sizing: border-box;
overflow: -moz-scrollbars-vertical;
overflow-y: scroll;
}
*,
*:before,
*:after {
box-sizing: inherit;
}
body {
margin: 0;
background: #fafafa;
}
</style>
</head>
<body>
<div id="swagger-ui"></div>
<script src="./swagger-ui-bundle.js" charset="UTF-8"> </script>
<script src="./swagger-ui-standalone-preset.js" charset="UTF-8"> </script>
<script>
window.onload = function() {
// Begin Swagger UI call region
const ui = SwaggerUIBundle({
url: "/openapi.json",
dom_id: '#swagger-ui',
deepLinking: true,
presets: [
SwaggerUIBundle.presets.apis,
SwaggerUIStandalonePreset
],
plugins: [
SwaggerUIBundle.plugins.DownloadUrl
],
layout: "StandaloneLayout"
});
// End Swagger UI call region
window.ui = ui;
};
</script>
</body>
</html>
`;
fs.writeFileSync(path.join(targetDir, 'index.html'), indexHtml);
console.log('Swagger UI files generated successfully in static/swagger-ui/');
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -61,7 +61,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.cli
```
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -56,6 +56,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
```
@@ -13,16 +13,16 @@
La façon la plus simple d'exécuter OpenHands est avec Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.34
docker.all-hands.dev/all-hands-ai/openhands:0.33
```
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.cli
```
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -42,7 +42,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.main -t "write a bash script that prints hi"
```
@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー
```bash
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -82,5 +82,5 @@ docker network create openhands-network
# 分離されたネットワークで OpenHands を実行
docker run # ... \
--network openhands-network \
docker.all-hands.dev/all-hands-ai/openhands:0.34
docker.all-hands.dev/all-hands-ai/openhands:0.33
```
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.cli
```
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.main -t "escreva um script bash que imprima oi"
```
@@ -58,17 +58,17 @@
A maneira mais fácil de executar o OpenHands é no Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.34
docker.all-hands.dev/all-hands-ai/openhands:0.33
```
Você encontrará o OpenHands em execução em http://localhost:3000!
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -59,7 +59,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.cli
```
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -57,6 +57,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
```
@@ -11,16 +11,16 @@
在 Docker 中运行 OpenHands 是最简单的方式。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.34
docker.all-hands.dev/all-hands-ai/openhands:0.33
```
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
@@ -11,7 +11,7 @@
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```
+10 -18
View File
@@ -8,22 +8,18 @@ OpenHands Cloud can be accessed at https://app.all-hands.dev/.
## Getting Started
After visiting OpenHands Cloud, you will be asked to connect with your GitHub or GitLab account:
1. After reading and accepting the terms of service, click `Log in with GitHub` or `Log in with GitLab`.
After visiting OpenHands Cloud, you will be asked to connect with your GitHub account:
1. After reading and accepting the terms of service, click `Connect to GitHub`.
2. Review the permissions requested by OpenHands and then click `Authorize OpenHands AI`.
- OpenHands will require some permissions from your GitHub or GitLab account. To read more about these permissions:
- GitHub: You can click the `Learn more` link on the GitHub authorize page.
- GitLab: You can expand each permission request on the GitLab authorize page.
- OpenHands will require some permissions from your GitHub account. To read more about these permissions,
you can click the `Learn more` link on the GitHub authorize page.
## Repository Access
### GitHub
#### Adding Repository Access
### Adding Repository Access
You can grant OpenHands specific repository access:
1. Click `Add GitHub repos` on the Home page.
1. Click the `Select a GitHub project` dropdown, select `Add more repositories...`.
2. Select the organization, then choose the specific repositories to grant OpenHands access to.
<details>
<summary>Permission Details for Repository Access</summary>
@@ -46,15 +42,11 @@ You can grant OpenHands specific repository access:
3. Click on `Install & Authorize`.
#### Modifying Repository Access
### Modifying Repository Access
You can modify GitHub repository access at any time by:
* Using the same `Add GitHub repos` workflow, or
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `Git Settings` section.
### GitLab
When using your GitLab account, OpenHands will automatically have access to your repositories.
You can modify repository access at any time by:
* Using the same `Select a GitHub project > Add more repositories` workflow, or
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `GitHub Settings` section.
## Conversation Persistence
+2 -2
View File
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.cli
```
@@ -1,8 +1,7 @@
# Custom Sandbox
:::note
This guide is for users that would like to use their own custom Docker image for the runtime. For example
with certain tools or programming languages pre-installed.
This guide is for users that would like to use their own custom Docker image for the runtime, e.g. with certain tools or programming languages pre-installed
:::
The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer
+24 -4
View File
@@ -24,8 +24,9 @@ OpenHands supports multiple version control providers. You can configure tokens
#### GitHub Token Setup
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if provided:
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
**Local Installation**: The user directly inputs their GitHub token.
<details>
<summary>Setting Up a GitHub Token</summary>
@@ -39,8 +40,9 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if pro
- Minimal Permissions ( Select `Meta Data = Read-only` read for search, `Pull Requests = Read and Write` and `Content = Read and Write` for branch creation)
2. **Enter Token in OpenHands**:
- Click the Settings button (gear icon).
- Navigate to the `Git Provider Settings` section.
- Paste your token in the `GitHub Token` field.
- Click `Save` to apply the changes.
- Click `Save Changes` to apply the changes.
</details>
<details>
@@ -81,9 +83,26 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if pro
- Check the browser console for any error messages.
</details>
**OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.
<details>
<summary>OAuth Authentication</summary>
When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
- Repository access (read/write)
- Workflow management
- Organization read access
To authenticate OpenHands:
- Click `Sign in with GitHub` when prompted.
- Review the requested permissions.
- Authorize OpenHands to access your GitHub account.
- If using an organization, authorize organization access if prompted.
</details>
#### GitLab Token Setup
OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if provided:
OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment, for local installations only, if it is available.
<details>
<summary>Setting Up a GitLab Token</summary>
@@ -98,9 +117,10 @@ OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if pro
- Set an expiration date or leave it blank for a non-expiring token.
2. **Enter Token in OpenHands**:
- Click the Settings button (gear icon).
- Navigate to the `Git Provider Settings` section.
- Paste your token in the `GitLab Token` field.
- Enter your GitLab instance URL if using self-hosted GitLab.
- Click `Save` to apply the changes.
- Click `Save Changes` to apply the changes.
</details>
<details>
+2 -2
View File
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
python -m openhands.core.main -t "write a bash script that prints hi"
```
+3 -3
View File
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
The easiest way to run OpenHands is in Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.34
docker.all-hands.dev/all-hands-ai/openhands:0.33
```
You'll find OpenHands running at http://localhost:3000!
+4 -7
View File
@@ -6,26 +6,23 @@
- Displays the conversation between the user and OpenHands.
- OpenHands explains its actions in this panel.
### Changes
- Shows the file changes performed by OpenHands.
### Workspace
- Browse project files and directories.
- Use the `Open in VS Code` option to:
* Modify files
* Upload and download files
### Terminal
- A space for OpenHands and users to run terminal commands.
### Jupyter
- Shows all Python commands that were executed by OpenHands.
- Particularly handy when using OpenHands to perform data visualization tasks.
### App
- Displays the web server when OpenHands runs an application.
- Shows the web server when OpenHands runs an application.
- Users can interact with the running application.
### Browser
- Used by OpenHands to browse websites.
- The browser is non-interactive.
### Terminal
- A space for OpenHands and users to run terminal commands.
-2
View File
@@ -17,8 +17,6 @@ Based on these findings and community feedback, the following models have been v
- [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
- [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
- [openai/o3-mini](https://openai.com/index/openai-o3-mini/)
- [openai/o3](https://openai.com/index/introducing-o3-and-o4-mini/)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
- [all-hands/openhands-lm-32b-v0.1](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) -- available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1)
+4 -4
View File
@@ -15,7 +15,7 @@ It is highly recommended that you use GPUs to serve local models for optimal exp
For example, to download [OpenHands LM 32B v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1):
```bash
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/openhands-lm-32b-v0.1
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir my_folder/openhands-lm-32b-v0.1
```
## Create an OpenAI-Compatible Endpoint With a Model Serving Framework
@@ -27,7 +27,7 @@ huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/o
```bash
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
--model all-hands/openhands-lm-32b-v0.1 \
--model my_folder/openhands-lm-32b-v0.1 \
--served-model-name openhands-lm-32b-v0.1 \
--port 8000 \
--tp 2 --dp 1 \
@@ -41,7 +41,7 @@ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
- Example launch command for OpenHands LM 32B (with at least 2 GPUs):
```bash
vllm serve all-hands/openhands-lm-32b-v0.1 \
vllm serve my_folder/openhands-lm-32b-v0.1 \
--host 0.0.0.0 --port 8000 \
--api-key mykey \
--tensor-parallel-size 2 \
@@ -67,7 +67,7 @@ Ensure `config.toml` exists by running `make setup-config` which will create one
workspace_base="/path/to/your/workspace"
[llm]
model="openhands-lm-32b-v0.1"
embedding_model="local"
ollama_base_url="http://localhost:8000"
```
+19 -414
View File
@@ -24,8 +24,6 @@
"@docusaurus/module-type-aliases": "^3.5.1",
"@docusaurus/tsconfig": "^3.7.0",
"@docusaurus/types": "^3.5.1",
"swagger-cli": "^4.0.4",
"swagger-ui-dist": "^5.21.0",
"typescript": "~5.8.3"
},
"engines": {
@@ -275,273 +273,6 @@
"node": ">=6.0.0"
}
},
"node_modules/@apidevtools/openapi-schemas": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz",
"integrity": "sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-cli": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-cli/-/swagger-cli-4.0.4.tgz",
"integrity": "sha512-hdDT3B6GLVovCsRZYDi3+wMcB1HfetTU20l2DC8zD3iFRNMC6QNAZG5fo/6PYeHWBEv7ri4MvnlKodhNB0nt7g==",
"deprecated": "This package has been abandoned. Please switch to using the actively maintained @redocly/cli",
"dev": true,
"license": "MIT",
"dependencies": {
"@apidevtools/swagger-parser": "^10.0.1",
"chalk": "^4.1.0",
"js-yaml": "^3.14.0",
"yargs": "^15.4.1"
},
"bin": {
"swagger-cli": "bin/swagger-cli.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
"integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
"dev": true,
"license": "MIT",
"dependencies": {
"sprintf-js": "~1.0.2"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/camelcase": {
"version": "5.3.1",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
"integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/cliui": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
"integrity": "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==",
"dev": true,
"license": "ISC",
"dependencies": {
"string-width": "^4.2.0",
"strip-ansi": "^6.0.0",
"wrap-ansi": "^6.2.0"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/emoji-regex": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
"dev": true,
"license": "MIT"
},
"node_modules/@apidevtools/swagger-cli/node_modules/find-up": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
"integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
"dev": true,
"license": "MIT",
"dependencies": {
"locate-path": "^5.0.0",
"path-exists": "^4.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/js-yaml": {
"version": "3.14.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
"integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
"dev": true,
"license": "MIT",
"dependencies": {
"argparse": "^1.0.7",
"esprima": "^4.0.0"
},
"bin": {
"js-yaml": "bin/js-yaml.js"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/locate-path": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
"integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
"dev": true,
"license": "MIT",
"dependencies": {
"p-locate": "^4.1.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/p-limit": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
"integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
"dev": true,
"license": "MIT",
"dependencies": {
"p-try": "^2.0.0"
},
"engines": {
"node": ">=6"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/p-locate": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
"integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
"dev": true,
"license": "MIT",
"dependencies": {
"p-limit": "^2.2.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
"integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"dev": true,
"license": "MIT",
"dependencies": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/wrap-ansi": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz",
"integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==",
"dev": true,
"license": "MIT",
"dependencies": {
"ansi-styles": "^4.0.0",
"string-width": "^4.1.0",
"strip-ansi": "^6.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/y18n": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.3.tgz",
"integrity": "sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==",
"dev": true,
"license": "ISC"
},
"node_modules/@apidevtools/swagger-cli/node_modules/yargs": {
"version": "15.4.1",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-15.4.1.tgz",
"integrity": "sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==",
"dev": true,
"license": "MIT",
"dependencies": {
"cliui": "^6.0.0",
"decamelize": "^1.2.0",
"find-up": "^4.1.0",
"get-caller-file": "^2.0.1",
"require-directory": "^2.1.1",
"require-main-filename": "^2.0.0",
"set-blocking": "^2.0.0",
"string-width": "^4.2.0",
"which-module": "^2.0.0",
"y18n": "^4.0.0",
"yargs-parser": "^18.1.2"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@apidevtools/swagger-cli/node_modules/yargs-parser": {
"version": "18.1.3",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-18.1.3.tgz",
"integrity": "sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==",
"dev": true,
"license": "ISC",
"dependencies": {
"camelcase": "^5.0.0",
"decamelize": "^1.2.0"
},
"engines": {
"node": ">=6"
}
},
"node_modules/@apidevtools/swagger-methods": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
"integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg==",
"dev": true,
"license": "MIT"
},
"node_modules/@apidevtools/swagger-parser": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.1.1.tgz",
"integrity": "sha512-u/kozRnsPO/x8QtKYJOqoGtC4kH6yg1lfYkB9Au0WhYB0FNLpyFusttQtvhlwjtG3rOwiRz4D8DnnXa8iEpIKA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@apidevtools/json-schema-ref-parser": "11.7.2",
"@apidevtools/openapi-schemas": "^2.1.0",
"@apidevtools/swagger-methods": "^3.0.2",
"@jsdevtools/ono": "^7.1.3",
"ajv": "^8.17.1",
"ajv-draft-04": "^1.0.0",
"call-me-maybe": "^1.0.2"
},
"peerDependencies": {
"openapi-types": ">=7"
}
},
"node_modules/@apidevtools/swagger-parser/node_modules/@apidevtools/json-schema-ref-parser": {
"version": "11.7.2",
"resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-11.7.2.tgz",
"integrity": "sha512-4gY54eEGEstClvEkGnwVkTkrx0sqwemEFG5OSRRn3tD91XH0+Q8XIkYIfo7IwEWPpJZwILb9GUXeShtplRc/eA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@jsdevtools/ono": "^7.1.3",
"@types/json-schema": "^7.0.15",
"js-yaml": "^4.1.0"
},
"engines": {
"node": ">= 16"
},
"funding": {
"url": "https://github.com/sponsors/philsturgeon"
}
},
"node_modules/@babel/code-frame": {
"version": "7.26.2",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
@@ -4104,13 +3835,6 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@jsdevtools/ono": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
"integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==",
"dev": true,
"license": "MIT"
},
"node_modules/@leichtgewicht/ip-codec": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz",
@@ -4246,14 +3970,6 @@
"integrity": "sha512-8LduaNlMZGwdZ6qWrKlfa+2M4gahzFkprZiAt2TF8uS0qQgBizKXpXURqvTJ4WtmupWxaLqjRb2UCTe72mu+Aw==",
"license": "MIT"
},
"node_modules/@scarf/scarf": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
"dev": true,
"hasInstallScript": true,
"license": "Apache-2.0"
},
"node_modules/@sideway/address": {
"version": "4.1.5",
"resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.5.tgz",
@@ -5251,21 +4967,6 @@
"url": "https://github.com/sponsors/epoberezkin"
}
},
"node_modules/ajv-draft-04": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/ajv-draft-04/-/ajv-draft-04-1.0.0.tgz",
"integrity": "sha512-mv00Te6nmYbRp5DCwclxtt7yV/joXJPGS7nM+97GdxvuttCOfgI3K4U25zboyeX0O+myI8ERluxQe5wljMmVIw==",
"dev": true,
"license": "MIT",
"peerDependencies": {
"ajv": "^8.5.0"
},
"peerDependenciesMeta": {
"ajv": {
"optional": true
}
}
},
"node_modules/ajv-formats": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz",
@@ -5848,13 +5549,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/call-me-maybe": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
"integrity": "sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ==",
"dev": true,
"license": "MIT"
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -7498,16 +7192,6 @@
}
}
},
"node_modules/decamelize": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
"integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/decode-named-character-reference": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz",
@@ -8916,16 +8600,6 @@
"node": ">=6.9.0"
}
},
"node_modules/get-caller-file": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
"dev": true,
"license": "ISC",
"engines": {
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/get-intrinsic": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.7.tgz",
@@ -13436,16 +13110,15 @@
}
},
"node_modules/nanoid": {
"version": "3.3.11",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
"integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
"version": "3.3.7",
"resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz",
"integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/ai"
}
],
"license": "MIT",
"bin": {
"nanoid": "bin/nanoid.cjs"
},
@@ -13754,14 +13427,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/openapi-types": {
"version": "12.1.3",
"resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
"dev": true,
"license": "MIT",
"peer": true
},
"node_modules/opener": {
"version": "1.5.2",
"resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
@@ -14144,9 +13809,9 @@
}
},
"node_modules/postcss": {
"version": "8.4.49",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz",
"integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==",
"version": "8.4.38",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.38.tgz",
"integrity": "sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==",
"funding": [
{
"type": "opencollective",
@@ -14161,11 +13826,10 @@
"url": "https://github.com/sponsors/ai"
}
],
"license": "MIT",
"dependencies": {
"nanoid": "^3.3.7",
"picocolors": "^1.1.1",
"source-map-js": "^1.2.1"
"picocolors": "^1.0.0",
"source-map-js": "^1.2.0"
},
"engines": {
"node": "^10 || ^12 || >=14"
@@ -15703,15 +15367,6 @@
"node": ">= 0.10"
}
},
"node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/pupa": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/pupa/-/pupa-3.1.0.tgz",
@@ -16550,16 +16205,6 @@
"node": ">=0.10"
}
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/require-from-string": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
@@ -16577,13 +16222,6 @@
"node": "*"
}
},
"node_modules/require-main-filename": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz",
"integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==",
"dev": true,
"license": "ISC"
},
"node_modules/requires-port": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
@@ -17064,13 +16702,6 @@
"node": ">= 0.8.0"
}
},
"node_modules/set-blocking": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
"integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
"dev": true,
"license": "ISC"
},
"node_modules/set-function-length": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
@@ -17349,10 +16980,9 @@
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
"integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
"license": "BSD-3-Clause",
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz",
"integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==",
"engines": {
"node": ">=0.10.0"
}
@@ -17717,32 +17347,6 @@
"resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
"integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA=="
},
"node_modules/swagger-cli": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/swagger-cli/-/swagger-cli-4.0.4.tgz",
"integrity": "sha512-Cp8YYuLny3RJFQ4CvOBTaqmOOgYsem52dPx1xM5S4EUWFblIh2Q8atppMZvXKUr1e9xH5RwipYpmdUzdPcxWcA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@apidevtools/swagger-cli": "4.0.4"
},
"bin": {
"swagger-cli": "swagger-cli.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/swagger-ui-dist": {
"version": "5.21.0",
"resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.21.0.tgz",
"integrity": "sha512-E0K3AB6HvQd8yQNSMR7eE5bk+323AUxjtCz/4ZNKiahOlPhPJxqn3UPIGs00cyY/dhrTDJ61L7C/a8u6zhGrZg==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@scarf/scarf": "=1.4.0"
}
},
"node_modules/tapable": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
@@ -18345,6 +17949,14 @@
"punycode": "^2.1.0"
}
},
"node_modules/uri-js/node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
"engines": {
"node": ">=6"
}
},
"node_modules/url-loader": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/url-loader/-/url-loader-4.1.1.tgz",
@@ -18998,13 +18610,6 @@
"node": ">= 8"
}
},
"node_modules/which-module": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.1.tgz",
"integrity": "sha512-iBdZ57RDvnOR9AGBhML2vFZf7h8vmBjhoaZqODJBFWHVtKkDmKuHai3cx5PgVMrX5YDNp27AofYbAwctSS+vhQ==",
"dev": true,
"license": "ISC"
},
"node_modules/widest-line": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/widest-line/-/widest-line-4.0.1.tgz",
+4 -9
View File
@@ -4,18 +4,16 @@
"private": true,
"scripts": {
"docusaurus": "docusaurus",
"start": "node generate-swagger-ui.js && docusaurus start",
"build": "node generate-swagger-ui.js && docusaurus build",
"start": "docusaurus start",
"build": "docusaurus build",
"swizzle": "docusaurus swizzle",
"deploy": "docusaurus deploy",
"clear": "docusaurus clear",
"serve": "docusaurus serve",
"write-translations": "docusaurus write-translations",
"write-heading-ids": "docusaurus write-heading-ids",
"typecheck": "tsc",
"generate-swagger-ui": "node generate-swagger-ui.js"
"typecheck": "tsc"
},
"// Note": "The OpenAPI spec is stored in docs/static/openapi.json so it's accessible at /openapi.json in the deployed site",
"dependencies": {
"@docusaurus/core": "^3.7.0",
"@docusaurus/plugin-content-pages": "^3.7.0",
@@ -33,8 +31,6 @@
"@docusaurus/module-type-aliases": "^3.5.1",
"@docusaurus/tsconfig": "^3.7.0",
"@docusaurus/types": "^3.5.1",
"swagger-cli": "^4.0.4",
"swagger-ui-dist": "^5.21.0",
"typescript": "~5.8.3"
},
"browserslist": {
@@ -51,6 +47,5 @@
},
"engines": {
"node": ">=18.0"
},
"packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
}
}
+1 -1
View File
@@ -268,4 +268,4 @@ const sidebars: SidebarsConfig = {
],
};
export default sidebars;
export default sidebars;
-15
View File
@@ -1,15 +0,0 @@
# Static Files for OpenHands Documentation
This directory contains static files that are copied directly to the build output of the Docusaurus documentation.
## OpenAPI Specification
The `openapi.json` file in this directory is the OpenAPI specification for the OpenHands API. It is copied to the build output and is accessible at `/openapi.json` in the deployed site.
This file is used by the Swagger UI interface, which is accessible at `/swagger-ui/` in the deployed site.
## Why is the OpenAPI spec in the static directory?
The OpenAPI specification is placed in the static directory so that it's accessible at a predictable URL in the deployed site. This allows the Swagger UI to reference it directly.
We only need one copy of the OpenAPI spec file, which is this one in the static directory.
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 120 KiB

After

Width:  |  Height:  |  Size: 148 KiB

-2085
View File
File diff suppressed because it is too large Load Diff
+178 -450
View File
File diff suppressed because it is too large Load Diff
@@ -1 +0,0 @@
config.yaml
@@ -1,35 +0,0 @@
# CI Builds Repair Benchmark Integration
This module integrates the CI Builds Repair benchmark developed by [JetBrains-Research](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark).
For more information, refer to the [GitHub repository](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark) and the associated [research paper](https://arxiv.org/abs/2406.11612).
See notice below for details
## Setup
Before running any scripts, make sure to configure the benchmark by setting up `config.yaml`.
This benchmark pushes to JetBrains' private GitHub repository. You will to request a `token_gh` provided by their team, to run this benchmark.
## Inference
To run inference with your model:
```bash
./evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh llm.yourmodel
```
## Evaluation
To evaluate the predictions:
```bash
./evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh predictions_path_containing_output
```
## Results
The benchmark contains 68 instances, we skip instances #126 and #145, and only run 66 instances due to dockerization errors.
Due to running in live GitHub machines, the benchmark is sensitive to the date it is run. Even the golden patches in the dataset might present failures due to updates.
For example, on 2025-04-09, running the benchmark against the golden patches gave 57/67 successes, with 1 job left in the waiting list.
On 2025-04-10, running the benchmark full with OH and no oracle, 37 succeeded. That is 54% of the complete set of 68 instances and 64% of the 57 that succeed with golden patches.
@@ -1,11 +0,0 @@
LCA_PATH: path #where to clone lca-ci rep
model_name: OpenHands
benchmark_owner: ICML-25-BenchName-builds-repair
token_gh: your_token
#for lca-ci-repo
repos_folder: /path/to/repos # here the cloned repos would be stored
out_folder: /out/folder # here the result files would be stored
data_cache_dir: /data/cache/dir/ # here the cached dataset would be stored
username_gh: username-gh # your GitHub username
# test_username: test_user # username that would be displayed in the benchmark. Optional. If ommitted, username_gh would be used
language: Python # dataset language (now only Python is available)
@@ -1,242 +0,0 @@
"""Implements evaluation on JetBrains CI builds repair baselines
Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
TODOs:
- Add more flags
"""
import json
import os
from pathlib import Path
import ruamel.yaml
from evaluation.utils.shared import (
EvalMetadata,
get_default_sandbox_config_for_eval,
make_metadata,
)
from openhands.core.config import (
AppConfig,
LLMConfig,
get_parser,
load_app_config,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False
return config
config = load_app_config()
def load_bench_config():
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
yaml = ruamel.yaml.YAML(typ='rt')
with open(config_path, 'r') as file:
return yaml.load(file)
bench_config = load_bench_config()
def run_eval(
runtime: Runtime,
):
"""Run the evaluation and create report"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
obs: CmdOutputObservation
lca_path = bench_config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
model_name = bench_config['model_name']
action = CmdRunAction(command=f'mkdir {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
action = CmdRunAction(command=f'git clone {lca_repo_url}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_ci_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command='git switch open-hands-integration')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
runtime.copy_to(config_path, lca_ci_path)
token_gh = bench_config['token_gh']
commandf = f'export TOKEN_GH={token_gh}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
action = CmdRunAction(command='poetry install')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# Set up the task environment
commandf = f'poetry run python run_eval_jobs.py --model-name "{model_name}" --config-path "{lca_ci_path}/config.yaml" --job-ids-file "/tmp/output_lca.jsonl" --result-filename "testfile.jsonl" > /tmp/single_output.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f'run_eval_jobs.py gave {obs.content} !')
# assert obs.exit_code == 0
commandf = 'cat /tmp/single_output.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f' {commandf} gave {obs.content}!')
testfile_path = os.path.join(bench_config['out_folder'], 'testfile.jsonl')
commandf = f'cat {testfile_path}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
report_str = obs.content
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
return report_str
def process_predictions(predictions_path: str):
output_path = Path(predictions_path)
if output_path.suffix != '.jsonl':
raise ValueError('output_path must end in .jsonl')
output_lca_path = output_path.with_name(output_path.stem + '_lca.jsonl')
with output_path.open() as infile, output_lca_path.open('w') as outfile:
for line in infile:
data = json.loads(line)
json.dump(data.get('test_result'), outfile)
outfile.write('\n')
return str(output_lca_path)
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'-s',
'--eval-split',
type=str,
default='test',
choices=['test'],
help='data split to evaluate on, must be test',
)
parser.add_argument(
'--predictions-path',
type=str,
help='Path to the directory containing the output.jsonl with the predictions.',
)
args, _ = parser.parse_known_args()
data_split = args.eval_split
llm_config = LLMConfig(model='dummy_model')
metadata = make_metadata(
llm_config,
f'jetbrains-lca-ci--{data_split}',
args.agent_cls,
args.max_iterations,
args.eval_note,
args.predictions_path,
)
# prepare image
config = get_config(metadata)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
logger.info('Converting output.jsonl into output_lca.jsonl')
predictions_lca_path = process_predictions(
os.path.join(args.predictions_path, 'output.jsonl')
)
runtime.copy_to(predictions_lca_path, '/tmp')
# get results
results_str = run_eval(runtime)
results_path = os.path.join(args.predictions_path, 'results.jsonl')
with open(results_path, 'w') as file:
file.write(results_str)
logger.info(f'Saved results to {results_path}')
# make a summary
resolved_instances = []
unresolved_instances = []
for line in results_str.strip().splitlines():
data = json.loads(line)
conclusion = data.get('conclusion')
if conclusion == 'success':
resolved_instances.append(data)
elif conclusion == 'failure':
unresolved_instances.append(data)
completed_instances = resolved_instances + unresolved_instances
report = {
'success': len(resolved_instances),
'failure': len(unresolved_instances),
'resolved_instances': resolved_instances,
'unresolved_instances': unresolved_instances,
'completed_instances': completed_instances,
}
print(f'Results: {report}')
report_path = os.path.join(args.predictions_path, 'report.jsonl')
with open(report_path, 'w') as out_f:
out_f.write(json.dumps(report) + '\n')
logger.info(f'Saved report of results in swebench format to {report_path}')
@@ -1,406 +0,0 @@
"""Implements inference on JetBrains CI builds repair baselines
Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
TODOs:
- Add EXP_NAME
"""
import asyncio
import json
import os
from typing import Any
import pandas as pd
import ruamel.yaml
from datasets import load_dataset
from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
codeact_user_response,
compatibility_for_eval_history_pairs,
get_default_sandbox_config_for_eval,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
get_llm_config_arg,
get_parser,
load_app_config,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(metadata.llm_config)
agent_config = config.get_agent_config(metadata.agent_class)
agent_config.enable_prompt_extensions = False
return config
config = load_app_config()
def load_bench_config():
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
yaml = ruamel.yaml.YAML(typ='rt')
with open(config_path, 'r') as file:
return yaml.load(file)
bench_config = load_bench_config()
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
}
AGENT_CLS_TO_INST_SUFFIX = {
'CodeActAgent': 'When you think you have completed the task, please finish the interaction using the "finish" tool.\n'
}
def initialize_runtime(
runtime: Runtime,
instance: pd.Series,
):
"""Initialize the runtime for the agent.
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
obs: CmdOutputObservation
lca_path = bench_config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
repo_name = instance['repo_name']
repos_path = bench_config['repos_folder']
repo_owner = instance['repo_owner']
repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
model_name = bench_config['model_name']
action = CmdRunAction(command=f'mkdir {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
action = CmdRunAction(command=f'git clone {lca_repo_url}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command=f'cd {lca_ci_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
action = CmdRunAction(command='git switch open-hands-integration')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
with open(config_path, 'r') as file:
config_as_text = file.read()
commandf = f"echo '{config_as_text}' > config.yaml"
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
token_gh = bench_config['token_gh']
commandf = f'export TOKEN_GH={token_gh}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
action = CmdRunAction(command='poetry install')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# Set up the task environment
commandf = f'poetry run python run_get_datapoint.py --model-name {model_name} --id {instance["id"]} > branch_name.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
if obs.exit_code != 0:
print(f'run_get_datapoint.py failed at {instance["id"]} with {obs.content}')
assert obs.exit_code == 0
commandf = 'cat branch_name.txt'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
bench_config['user_branch_name'] = obs.content
# Navigate to the task's code path
action = CmdRunAction(command=f'cd {repo_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
def complete_runtime(
runtime: Runtime,
instance: pd.Series,
) -> dict[str, Any]:
"""Complete the runtime for the agent.
This function is called before the runtime is used to run the agent.
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
obs: CmdOutputObservation
model_name = bench_config['model_name']
lca_path = bench_config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
user_branch_name = bench_config['user_branch_name']
token_gh = bench_config['token_gh']
commandf = f'export TOKEN_GH={token_gh}'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# Navigate to the lca-baseslines scripts path
action = CmdRunAction(command=f'cd {lca_ci_path}')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert obs.exit_code == 0
commandf = f'poetry run python run_push_datapoint.py --id {instance["id"]} --model-name {model_name} --user-branch-name {user_branch_name} > single_output.json'
logger.info(f'Running push script: {commandf}')
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
# assert obs.exit_code == 0
commandf = 'cat single_output.json'
action = CmdRunAction(command=commandf)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
result = json.loads(obs.content)
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
return result
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
config = get_config(metadata)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, instance['instance_id'], log_dir)
else:
logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
repo_name = instance['repo_name']
repo_workflow = instance['workflow_path']
repo_logs = instance['logs']
repos_path = bench_config['repos_folder']
repo_owner = instance['repo_owner']
repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
# Prepare the task instruction
instruction_no_oracle = f"""
<uploaded_files>
{repo_path}
</uploaded_files>
I've uploaded a python code repository in the directory {repo_path}, Consider the following issue:
<issue_description>
The repository must pass the CI workflow {repo_workflow}.
but it gave the following error
{repo_logs}
</issue_description>
Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?
I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
Your task is to make the minimal changes to non-test files in the {repo_path} directory to ensure the <issue_description> is satisfied.
Follow these phases to resolve the issue:
Phase 1. READING: read the problem and reword it in clearer terms
1.1 If there are code or config snippets. Express in words any best practices or conventions in them.
1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details.
1.3 Explain the problem in clear terms.
1.4 Enumerate the steps to reproduce the problem.
1.5 Hightlight any best practices to take into account when testing and fixing the issue
Phase 2. RUNNING: install and run the tests on the repository
2.1 Follow the readme
2.2 Install the environment and anything needed
2.2 Iterate and figure out how to run the tests
Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
3.2 Identify all files related to the problem statement.
3.3 Propose the methods and files to fix the issue and explain why.
3.4 From the possible file locations, select the most likely location to fix the issue.
Phase 4. TEST CREATION: before implementing any fix, create a script to reproduce and verify the issue.
4.1 Look at existing test files in the repository to understand the test format/structure.
4.2 Create a minimal reproduction script that reproduces the located issue.
4.3 Run the reproduction script to confirm you are reproducing the issue.
4.4 Adjust the reproduction script as necessary.
Phase 5. FIX ANALYSIS: state clearly the problem and how to fix it
5.1 State clearly what the problem is.
5.2 State clearly where the problem is located.
5.3 State clearly how the test reproduces the issue.
5.4 State clearly the best practices to take into account in the fix.
5.5 State clearly how to fix the problem.
Phase 6. FIX IMPLEMENTATION: Edit the source code to implement your chosen solution.
6.1 Make minimal, focused changes to fix the issue.
Phase 7. VERIFICATION: Test your implementation thoroughly.
7.1 Run your reproduction script to verify the fix works.
7.2 Add edge cases to your test script to ensure comprehensive coverage.
7.3 Run existing tests related to the modified code to ensure you haven't broken anything. Run any tests in the repository related to:
7.2.1 The issue you are fixing
7.2.2 The files you modified
7.2.3 The functions you changed
7.4 If any tests fail, revise your implementation until all tests pass
Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["sha_fail"]}.
8.1 Ensure you've fully addressed all requirements.
Once all phases are done, announce: 'Agent Task Complete'.
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
"""
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Run the agent
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction_no_oracle),
runtime=runtime,
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
metadata.agent_class
),
)
)
assert state is not None
metrics = state.metrics.get() if state.metrics else {}
test_result = complete_runtime(runtime, instance)
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
# for compatibility with the existing output format, we can remake the pairs here
# remove when it becomes unnecessary
histories = compatibility_for_eval_history_pairs(state.history)
# Save the output
output = EvalOutput(
instance_id=instance['instance_id'],
# instance=instance.to_dict(orient='recorods'),
instruction=instruction_no_oracle,
metadata=metadata,
history=histories,
test_result=test_result,
metrics=metrics,
)
return output
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'-s',
'--eval-split',
type=str,
default='test',
choices=['test'],
help='data split to evaluate on, must be test',
)
args, _ = parser.parse_known_args()
data_split = args.eval_split
bench = load_dataset(
'JetBrains-Research/lca-ci-builds-repair', split=data_split
).to_pandas()
# todo: see why 126 is giving problems on inference
# todo: see why 145 is giving problems on eval
bench = bench[bench['id'] != 126]
bench = bench[bench['id'] != 145]
# bench = bench.iloc[0:56]
# add column instnace_id for compatibility with oh repo, old id column must be kept for lca repo
bench['instance_id'] = bench['id'].astype(str)
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
metadata = make_metadata(
llm_config,
f'jetbrains-lca-ci--{data_split}',
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
instances = prepare_dataset(bench, output_file, args.eval_n_limit)
run_evaluation(
instances, metadata, output_file, args.eval_num_workers, process_instance
)
@@ -1,33 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
PROCESS_FILEPATH=$1
if [ -z "$PROCESS_FILEPATH" ]; then
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
exit 1
fi
get_openhands_version
PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "PROCESS_FILEPATH: $PROCESS_FILEPATH"
EVAL_NOTE="$OPENHANDS_VERSION"
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
--predictions-path $PROCESS_FILEPATH "
echo "RUNNING: $COMMAND"
# Run the command
eval $COMMAND
}
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
run_eval
@@ -1,27 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
get_openhands_version
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
EVAL_NOTE="$OPENHANDS_VERSION"
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
--llm-config $MODEL_CONFIG "
# Run the command
eval $COMMAND
}
#unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
run_eval
@@ -1,60 +0,0 @@
"""Installs LCA CI Build Repair benchmark with scripts for OH integration."""
import os
import shutil
import subprocess
import yaml
def setup():
# Read config.yaml
print('Reading config.yaml')
script_dir = os.path.dirname(
os.path.abspath(__file__)
) # Get the absolute path of the script
config_path = os.path.join(script_dir, 'config.yaml')
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
lca_path = config['LCA_PATH']
lca_ci_path = os.path.join(
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
)
repo_url = 'https://github.com/juanmichelini/lca-baselines'
# Clone the repository to LCA_CI_PATH
print(f'Cloning lca-baselines repository from {repo_url} into {lca_path}')
result = subprocess.run(
['git', 'clone', repo_url], cwd=lca_path, capture_output=True, text=True
)
if result.returncode != 0:
print(f'Warning cloning repository: {result.stderr}')
# Clone the repository to LCA_CI_PATH
print('Switching branches')
result = subprocess.run(
['git', 'switch', 'open-hands-integration'],
cwd=lca_ci_path,
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f'Warning switching repository: {result.stderr}')
# Move and rename config_lca.yaml (overwrite if exists)
lca_ci_config_path = os.path.join(lca_ci_path, 'config.yaml')
print(f'Copying config.yaml to {lca_ci_config_path}')
shutil.copy(config_path, lca_ci_config_path)
# Run poetry install in LCA_CI_PATH
print(f"Running 'poetry install' in {lca_ci_path}")
result = subprocess.run(
['poetry', 'install'], cwd=lca_ci_path, capture_output=True, text=True
)
if result.returncode != 0:
print(f'Warning during poetry install: {result.stderr}')
if __name__ == '__main__':
setup()
+1 -58
View File
@@ -2,8 +2,6 @@
This folder contains the evaluation harness that we built on top of the original [SWE-Bench benchmark](https://www.swebench.com/) ([paper](https://arxiv.org/abs/2310.06770)).
**UPDATE (4/8/2025): We now support running SWT-Bench evaluation! For more details, checkout [the corresponding section](#SWT-Bench-Evaluation).**
**UPDATE (03/27/2025): We now support SWE-Bench multimodal evaluation! Simply use "princeton-nlp/SWE-bench_Multimodal" as the dataset name in the `run_infer.sh` script to evaluate on multimodal instances.**
**UPDATE (2/18/2025): We now support running SWE-Gym using the same evaluation harness here. For more details, checkout [this README](./SWE-Gym.md).**
@@ -143,7 +141,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id] [dataset_name] [split]
# Example
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
```
The script now accepts optional arguments:
@@ -184,58 +182,3 @@ To clean-up all existing runtimes that you've already started, run:
```bash
ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
```
## SWT-Bench Evaluation
[SWT-Bench](https://swtbench.com/) ([paper](https://arxiv.org/abs/2406.12952)) is a benchmark for evaluating the capability of LLMs at creating unit tests. It is performed on the same instances as SWE-Bench, but requires a separate evaluation harness to capture coverage and issue reproduction. We therefore detail below how to leverage the inference script in this folder to run inference on SWT-Bench and how to use the SWT-Bench evaluation harness to evaluate them.
### Run inference on SWT-Bench
To run inference on SWT-Bench, you can use the same `run_infer.sh` script as described for evaluation on plain SWE-Bench. The only differences is that you need to specify the `mode` parameter to `swt` or `swt-ci` when running the script. For example, to run inference on SWT-Bench Verified, run the following command:
```bash
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [swe-dataset] test 1 swt
# Example - This runs evaluation on CodeActAgent for 500 instances on "SWT-bench_Verified"'s test set (corresponding to SWE-bench_Verified), with max 100 iteration per instances, with 1 number of workers running in parallel
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4o-2024-11-20 HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test 1 swt
```
The two modes `swt` and `swt-ci` have the following effect:
- `swt`: This mode will change the prompt to instruct the agent to generate reproducing test cases instead of resolving the issue.
- `swt-ci`: In addition to the changes by `swt`, this mode sets up the CI environment by i) pre-installing the environment in the docker image, such that the test framework can be executed without errors and ii) telling the model the exact command to run the test framework.
### Run evaluation for SWT-bench
The evaluation of these results is done leveraging [the SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master).
#### Extracting results into SWT-Bench harness format
In order to run evaluation of the obtained inference results in the SWT-Bench harness, we transform the results to a format that the SWT-Bench evaluation harness expects.
```bash
python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file [output.jsonl] > [output_swt.jsonl]
# Example
python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file "evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Verified-test/CodeActAgent/gpt-4o-2024-11-20_maxiter_100_N_v0.31.0-no-hint-swt-run_1/output.jsonl" > OpenHands-gpt-4o-2024-11-20.jsonl
```
#### Running the results in SWT-Bench
Next, we run the [SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master) with these results.
First set-up and validate the setup as described in the harness [here](https://github.com/logic-star-ai/swt-bench/tree/master?tab=readme-ov-file#-set-up).
Then, run the evaluation with the following command:
```bash
# Example
python3 -m src.main \
--dataset_name princeton-nlp/SWE-bench_Verified \
--predictions_path <pathTo>/OpenHands-gpt-4o-2024-11-20.jsonl \
--max_workers 12 \
--run_id OpenHands-CodeAct-gpt-4o-2024-11-20 --patch_types vanilla --build_mode api
```
The results of the evaluation can be obtained by running the reporting script of the harness.
```bash
# Example
python -m src.report run_instance_swt_logs/OpenHands-CodeAct-gpt-4o-2024-11-20/OpenHands__CodeActAgent__gpt-4o-2024-11-20 --dataset verified
```
@@ -1,842 +0,0 @@
# Based on https://github.com/logic-star-ai/swt-bench/blob/master/src/constants.py
# Constants - Installation Specifications
MAP_VERSION_TO_INSTALL_SKLEARN = {
k: {
'python': '3.6',
'packages': 'numpy scipy cython pytest pandas matplotlib',
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
'pip_packages': [
'cython',
'numpy==1.19.2',
'setuptools',
'scipy==1.5.2',
],
}
for k in ['0.20', '0.21', '0.22']
}
MAP_VERSION_TO_INSTALL_SKLEARN.update(
{
k: {
'python': '3.9',
'packages': "'numpy==1.19.2' 'scipy==1.5.2' 'cython==3.0.10' pytest 'pandas<2.0.0' 'matplotlib<3.9.0' setuptools pytest joblib threadpoolctl",
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
'pip_packages': ['cython', 'setuptools', 'numpy', 'scipy'],
}
for k in ['1.3', '1.4']
}
)
MAP_VERSION_TO_INSTALL_FLASK = {
'2.0': {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'setuptools==70.0.0',
'Werkzeug==2.3.7',
'Jinja2==3.0.1',
'itsdangerous==2.1.2',
'click==8.0.1',
'MarkupSafe==2.1.3',
],
},
'2.1': {
'python': '3.10',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'click==8.1.3',
'itsdangerous==2.1.2',
'Jinja2==3.1.2',
'MarkupSafe==2.1.1',
'Werkzeug==2.3.7',
],
},
}
MAP_VERSION_TO_INSTALL_FLASK.update(
{
k: {
'python': '3.11',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'click==8.1.3',
'itsdangerous==2.1.2',
'Jinja2==3.1.2',
'MarkupSafe==2.1.1',
'Werkzeug==2.3.7',
],
}
for k in ['2.2', '2.3']
}
)
MAP_VERSION_TO_INSTALL_DJANGO = {
k: {
'python': '3.5',
'packages': 'requirements.txt',
'pre_install': [
'apt-get update && apt-get install -y locales',
"echo 'en_US UTF-8' > /etc/locale.gen",
'locale-gen en_US.UTF-8',
],
'install': 'python setup.py install',
'pip_packages': ['setuptools'],
'eval_commands': [
'export LANG=en_US.UTF-8',
'export LC_ALL=en_US.UTF-8',
'export PYTHONIOENCODING=utf8',
'export LANGUAGE=en_US:en',
],
}
for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']
}
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {'python': '3.5', 'install': 'python setup.py install'}
for k in ['1.4', '1.5', '1.6']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.6',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'eval_commands': [
"sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
'export LANG=en_US.UTF-8',
'export LANGUAGE=en_US:en',
'export LC_ALL=en_US.UTF-8',
],
}
for k in ['3.0', '3.1', '3.2']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.8',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['4.0']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['4.1', '4.2']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.11',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['5.0']
}
)
MAP_VERSION_TO_INSTALL_REQUESTS = {
k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .'}
for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2']
+ ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17']
+ ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '3.0']
}
MAP_VERSION_TO_INSTALL_SEABORN = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'importlib-resources==6.0.1',
'kiwisolver==1.4.5',
'matplotlib==3.7.2',
'numpy==1.25.2',
'packaging==23.1',
'pandas==1.3.5', # 2.0.3
'pillow==10.0.0',
'pyparsing==3.0.9',
'pytest',
'python-dateutil==2.8.2',
'pytz==2023.3.post1',
'scipy==1.11.2',
'six==1.16.0',
'tzdata==2023.1',
'zipp==3.16.2',
],
}
for k in ['0.11']
}
MAP_VERSION_TO_INSTALL_SEABORN.update(
{
k: {
'python': '3.9',
'install': 'python -m pip install -e .[dev]',
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'importlib-resources==6.0.1',
'kiwisolver==1.4.5',
'matplotlib==3.7.2',
'numpy==1.25.2',
'packaging==23.1',
'pandas==2.0.0',
'pillow==10.0.0',
'pyparsing==3.0.9',
'pytest',
'python-dateutil==2.8.2',
'pytz==2023.3.post1',
'scipy==1.11.2',
'six==1.16.0',
'tzdata==2023.1',
'zipp==3.16.2',
],
}
for k in ['0.12', '0.13']
}
)
MAP_VERSION_TO_INSTALL_PYTEST = {
k: {'python': '3.9', 'install': 'python -m pip install -e .'}
for k in [
'4.4',
'4.5',
'4.6',
'5.0',
'5.1',
'5.2',
'5.3',
'5.4',
'6.0',
'6.2',
'6.3',
'7.0',
'7.1',
'7.2',
'7.4',
'8.0',
]
}
MAP_VERSION_TO_INSTALL_PYTEST['4.4']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.13.1',
'py==1.11.0',
'setuptools==68.0.0',
'six==1.16.0',
]
MAP_VERSION_TO_INSTALL_PYTEST['4.5']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.11.0',
'py==1.11.0',
'setuptools==68.0.0',
'six==1.16.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['4.6']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'six==1.16.0',
'wcwidth==0.2.6',
]
for k in ['5.0', '5.1', '5.2']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['5.3']['pip_packages'] = [
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['5.4']['pip_packages'] = [
'py==1.11.0',
'packaging==23.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.13.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['6.0']['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'toml==0.10.2',
]
for k in ['6.2', '6.3']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'toml==0.10.2',
]
MAP_VERSION_TO_INSTALL_PYTEST['7.0']['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
]
for k in ['7.1', '7.2']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['7.4']['pip_packages'] = [
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==1.3.0',
'exceptiongroup==1.1.3',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['8.0']['pip_packages'] = [
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==1.3.0',
'exceptiongroup==1.1.3',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_MATPLOTLIB = {
k: {
'python': '3.11',
'packages': 'environment.yml',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng'
],
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'ghostscript',
'kiwisolver==1.4.5',
'numpy==1.25.2',
'packaging==23.1',
'pillow==10.0.0',
'pikepdf',
'pyparsing==3.0.9',
'python-dateutil==2.8.2',
'six==1.16.0',
'setuptools==68.1.2',
'setuptools-scm==7.1.0',
'typing-extensions==4.7.1',
],
}
for k in ['3.5', '3.6', '3.7']
}
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.8',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super'
],
'pip_packages': ['pytest', 'ipython'],
}
for k in ['3.1', '3.2', '3.3', '3.4']
}
)
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.7',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config'
],
'pip_packages': ['pytest'],
}
for k in ['3.0']
}
)
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.5',
'install': 'python setup.py build; python setup.py install',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && && apt-get install -y imagemagick ffmpeg'
],
'pip_packages': ['pytest'],
'execute_test_as_nonroot': True,
}
for k in ['2.0', '2.1', '2.2', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5']
}
)
MAP_VERSION_TO_INSTALL_SPHINX = {
k: {
'python': '3.9',
'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11'],
'install': 'python -m pip install -e .[test]',
'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"],
}
for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']
+ ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']
+ ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2']
}
for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py",
"sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py",
"sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py",
"sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py",
"sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py",
"sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py",
]
)
if k in ['4.2', '4.3', '4.4']:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py",
]
)
elif k == '4.1':
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
(
"grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && "
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || "
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py"
),
(
"grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && "
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || "
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py"
),
]
)
else:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py",
]
)
MAP_VERSION_TO_INSTALL_SPHINX['7.2']['pre_install'] += [
'apt-get update && apt-get install -y graphviz'
]
MAP_VERSION_TO_INSTALL_ASTROPY = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .[test] --verbose',
'pip_packages': [
'attrs==23.1.0',
'exceptiongroup==1.1.3',
'execnet==2.0.2',
'hypothesis==6.82.6',
'iniconfig==2.0.0',
'numpy==1.25.2',
'packaging==23.1',
'pluggy==1.3.0',
'psutil==5.9.5',
'pyerfa==2.0.0.3',
'pytest-arraydiff==0.5.0',
'pytest-astropy-header==0.2.2',
'pytest-astropy==0.10.0',
'pytest-cov==4.1.0',
'pytest-doctestplus==1.0.0',
'pytest-filter-subpackage==0.1.2',
'pytest-mock==3.11.1',
'pytest-openfiles==0.5.0',
'pytest-remotedata==0.4.0',
'pytest-xdist==3.3.1',
'pytest==7.4.0',
'PyYAML==6.0.1',
'setuptools==68.0.0',
'sortedcontainers==2.4.0',
'tomli==2.0.1',
],
}
for k in ['0.1', '0.2', '0.3', '0.4', '1.1', '1.2', '1.3', '3.0', '3.1', '3.2']
+ ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']
}
for k in ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']:
MAP_VERSION_TO_INSTALL_ASTROPY[k]['pre_install'] = [
'sed -i \'s/requires = \\["setuptools",/requires = \\["setuptools==68.0.0",/\' pyproject.toml'
]
MAP_VERSION_TO_INSTALL_SYMPY = {
k: {
'python': '3.9',
'packages': 'mpmath flake8',
'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'],
'install': 'python -m pip install -e .',
}
for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6']
+ ['1.7', '1.8', '1.9']
}
MAP_VERSION_TO_INSTALL_SYMPY.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': ['mpmath==1.3.0'],
}
for k in ['1.13']
}
)
MAP_VERSION_TO_INSTALL_PYLINT = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'2.10',
'2.11',
'2.13',
'2.14',
'2.15',
'2.16',
'2.17',
'2.8',
'2.9',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pip_packages'] = ['pyenchant==3.2']
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pre_install'] = [
'apt-get update && apt-get install -y libenchant-2-dev hunspell-en-us'
]
MAP_VERSION_TO_INSTALL_PYLINT.update(
{
k: {
**MAP_VERSION_TO_INSTALL_PYLINT[k],
'pip_packages': ['astroid==3.0.0a6', 'setuptools'],
}
for k in ['3.0']
}
)
MAP_VERSION_TO_INSTALL_XARRAY = {
k: {
'python': '3.10',
'packages': 'environment.yml',
'install': 'python -m pip install -e .',
'pip_packages': [
'numpy==1.23.0',
'packaging==23.1',
'pandas==1.5.3',
'pytest==7.4.0',
'python-dateutil==2.8.2',
'pytz==2023.3',
'six==1.16.0',
'scipy==1.11.1',
'setuptools==68.0.0',
],
'no_use_env': True,
}
for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09']
}
MAP_VERSION_TO_INSTALL_SQLFLUFF = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'0.10',
'0.11',
'0.12',
'0.13',
'0.4',
'0.5',
'0.6',
'0.8',
'0.9',
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'2.0',
'2.1',
'2.2',
]
}
MAP_VERSION_TO_INSTALL_DBT_CORE = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'0.13',
'0.14',
'0.15',
'0.16',
'0.17',
'0.18',
'0.19',
'0.20',
'0.21',
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'1.5',
'1.6',
'1.7',
]
}
MAP_VERSION_TO_INSTALL_PYVISTA = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in ['0.20', '0.21', '0.22', '0.23']
}
MAP_VERSION_TO_INSTALL_PYVISTA.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in [
'0.24',
'0.25',
'0.26',
'0.27',
'0.28',
'0.29',
'0.30',
'0.31',
'0.32',
'0.33',
'0.34',
'0.35',
'0.36',
'0.37',
'0.38',
'0.39',
'0.40',
'0.41',
'0.42',
'0.43',
]
}
)
MAP_VERSION_TO_INSTALL_ASTROID = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in [
'2.10',
'2.12',
'2.13',
'2.14',
'2.15',
'2.16',
'2.5',
'2.6',
'2.7',
'2.8',
'2.9',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_MARSHMALLOW = {
k: {
'python': '3.9',
'install': "python -m pip install -e '.[dev]'",
}
for k in [
'2.18',
'2.19',
'2.20',
'3.0',
'3.1',
'3.10',
'3.11',
'3.12',
'3.13',
'3.15',
'3.16',
'3.19',
'3.2',
'3.4',
'3.8',
'3.9',
]
}
MAP_VERSION_TO_INSTALL_PVLIB = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .[all]',
'packages': 'pandas scipy',
'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'],
}
for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
}
MAP_VERSION_TO_INSTALL_PYDICOM = {
k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy'}
for k in [
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'2.0',
'2.1',
'2.2',
'2.3',
'2.4',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.8'} for k in ['1.4', '2.0']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.9'} for k in ['2.1', '2.2']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.10'} for k in ['2.3']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.11'} for k in ['2.4', '3.0']}
)
MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {'python': '3.9'} for k in ['1.0']}
MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX = {
k: {'python': '3.10', 'packages': 'pytest'} for k in ['0.0.1']
}
# Constants - Task Instance Instllation Environment
MAP_VERSION_TO_INSTALL = {
'astropy/astropy': MAP_VERSION_TO_INSTALL_ASTROPY,
'dbt-labs/dbt-core': MAP_VERSION_TO_INSTALL_DBT_CORE,
'django/django': MAP_VERSION_TO_INSTALL_DJANGO,
'matplotlib/matplotlib': MAP_VERSION_TO_INSTALL_MATPLOTLIB,
'marshmallow-code/marshmallow': MAP_VERSION_TO_INSTALL_MARSHMALLOW,
'mwaskom/seaborn': MAP_VERSION_TO_INSTALL_SEABORN,
'pallets/flask': MAP_VERSION_TO_INSTALL_FLASK,
'psf/requests': MAP_VERSION_TO_INSTALL_REQUESTS,
'pvlib/pvlib-python': MAP_VERSION_TO_INSTALL_PVLIB,
'pydata/xarray': MAP_VERSION_TO_INSTALL_XARRAY,
'pydicom/pydicom': MAP_VERSION_TO_INSTALL_PYDICOM,
'pylint-dev/astroid': MAP_VERSION_TO_INSTALL_ASTROID,
'pylint-dev/pylint': MAP_VERSION_TO_INSTALL_PYLINT,
'pytest-dev/pytest': MAP_VERSION_TO_INSTALL_PYTEST,
'pyvista/pyvista': MAP_VERSION_TO_INSTALL_PYVISTA,
'scikit-learn/scikit-learn': MAP_VERSION_TO_INSTALL_SKLEARN,
'sphinx-doc/sphinx': MAP_VERSION_TO_INSTALL_SPHINX,
'sqlfluff/sqlfluff': MAP_VERSION_TO_INSTALL_SQLFLUFF,
'swe-bench/humaneval': MAP_VERSION_TO_INSTALL_HUMANEVAL,
'nielstron/humaneval_fix': MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX,
'sympy/sympy': MAP_VERSION_TO_INSTALL_SYMPY,
}
# Constants - Repository Specific Installation Instructions
MAP_REPO_TO_INSTALL = {}
# Constants - Task Instance Test Frameworks
TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long -p no:cacheprovider'
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
'astropy/astropy': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROPY.keys()
},
'django/django': {
k: './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
for k in MAP_VERSION_TO_INSTALL_DJANGO.keys()
},
'marshmallow-code/marshmallow': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MARSHMALLOW.keys()
},
'matplotlib/matplotlib': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MATPLOTLIB.keys()
},
'mwaskom/seaborn': {
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_SEABORN.keys()
},
'pallets/flask': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_FLASK.keys()
},
'psf/requests': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_REQUESTS.keys()
},
'pvlib/pvlib-python': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PVLIB.keys()
},
'pydata/xarray': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_XARRAY.keys()
},
'pydicom/pydicom': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYDICOM.keys()
},
'pylint-dev/astroid': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROID.keys()
},
'pylint-dev/pylint': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYLINT.keys()
},
'pytest-dev/pytest': {
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_PYTEST.keys()
},
'pyvista/pyvista': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYVISTA.keys()
},
'scikit-learn/scikit-learn': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SKLEARN.keys()
},
'sphinx-doc/sphinx': {
k: 'tox -epy39 -v --' for k in MAP_VERSION_TO_INSTALL_SPHINX.keys()
},
'sqlfluff/sqlfluff': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SQLFLUFF.keys()
},
'swe-bench/humaneval': {
k: 'python' for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
},
'nielstron/humaneval_fix': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
},
'sympy/sympy': {
k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
},
}
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
'./tests/runtests.py --verbosity 2'
)
+3 -69
View File
@@ -3,7 +3,7 @@ import copy
import json
import os
import tempfile
from typing import Any, Literal
from typing import Any
import pandas as pd
import toml
@@ -17,11 +17,6 @@ from evaluation.benchmarks.swe_bench.binary_patch_utils import (
from evaluation.benchmarks.swe_bench.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
MAP_REPO_TO_INSTALL,
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE,
MAP_VERSION_TO_INSTALL,
)
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
@@ -60,7 +55,6 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
BenchMode = Literal['swe', 'swt', 'swt-ci']
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@@ -74,36 +68,7 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
mode = metadata.details['mode']
if mode.startswith('swt'):
test_instructions = (
f'The following command can be used to run the tests: `{list(MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE[instance.repo].values())[0]}`. Make sure they fail in the expected way.\n'
if mode.endswith('ci')
else ''
)
instruction = f"""\
<uploaded_files>
/workspace/{workspace_dir_name}
</uploaded_files>
I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:
<issue_description>
{instance.problem_statement}
</issue_description>
Can you help me implement the necessary changes to the repository to test whether the issue in <issue_description> was resolved?
I will take care of all changes to any of the non-test files. This means you DON'T have to modify the actual logic and ONLY have to update test logic and tests!
Your task is to make the minimal changes to tests files in the /workspace directory to reproduce the issue in the <issue_description>, i.e., such that the generated tests fail in the current state (where the issue is unresolved) and pass when the issue will be resolved.
Follow these steps to reproduce the issue:
1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
2. Create a script `reproduction.py` to reproduce the error and execute it with `python reproduction.py` using the BashTool, to confirm the error
3. Edit the sourcecode of the repo to integrate your reproduction script into the test framework
4. Run the test framework and make sure your tests fail! Only submit FAILING tests! Never submit passing tests.
{test_instructions}Your thinking should be thorough and so it's fine if it's very long.
"""
else:
instruction = f"""
instruction = f"""
<uploaded_files>
/workspace/{workspace_dir_name}
</uploaded_files>
@@ -391,30 +356,6 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
if metadata.details['mode'] == 'swt-ci':
# set up repo
setup_commands = []
if instance['repo'] in MAP_REPO_TO_INSTALL:
setup_commands.append(MAP_REPO_TO_INSTALL[instance['repo']])
# Run pre-install set up if provided
install = MAP_VERSION_TO_INSTALL.get(instance['repo'], {}).get(
instance['version'], []
)
if 'pre_install' in install:
for pre_install in install['pre_install']:
setup_commands.append(pre_install)
if 'install' in install:
setup_commands.append(install['install'])
for command in setup_commands:
action = CmdRunAction(command=command)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if 'multimodal' not in metadata.dataset.lower():
# Only for non-multimodal datasets, we need to activate the testbed environment for Python
# SWE-Bench multimodal datasets are not using the testbed environment
@@ -737,13 +678,6 @@ if __name__ == '__main__':
default='test',
help='split to evaluate on',
)
parser.add_argument(
'--mode',
type=str,
default='swe',
choices=['swe', 'swt', 'swt-ci'],
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -780,7 +714,7 @@ if __name__ == '__main__':
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
details = {'mode': args.mode}
details = {}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
dataset_descrption = (
@@ -12,7 +12,6 @@ NUM_WORKERS=$6
DATASET=$7
SPLIT=$8
N_RUNS=$9
MODE=${10}
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
@@ -46,11 +45,6 @@ if [ -z "$SPLIT" ]; then
SPLIT="test"
fi
if [ -z "$MODE" ]; then
MODE="swe"
echo "MODE not specified, use default $MODE"
fi
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
@@ -61,10 +55,6 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "SPLIT: $SPLIT"
echo "MAX_ITER: $MAX_ITER"
echo "NUM_WORKERS: $NUM_WORKERS"
echo "COMMIT_HASH: $COMMIT_HASH"
echo "MODE: $MODE"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
@@ -84,13 +74,9 @@ fi
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
# if mode != swe, add mode to the eval note
if [ "$MODE" != "swe" ]; then
EVAL_NOTE="${EVAL_NOTE}-${MODE}"
fi
function run_eval() {
local eval_note="${1}"
local eval_note=$1
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
@@ -98,8 +84,7 @@ function run_eval() {
--eval-num-workers $NUM_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT \
--mode $MODE"
--split $SPLIT"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
@@ -1,95 +0,0 @@
import argparse
import json
import logging
import unidiff
from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
MAP_VERSION_TO_INSTALL,
)
_LOGGER = logging.getLogger(__name__)
def remove_setup_files(model_patch: str, instance: dict, delete_setup_changes: bool):
"""Discard all changes that a patch applies to files changes by the pre_install script and that are reproduction scripts (top-level script)"""
setup_files = ['setup.py', 'tox.ini', 'pyproject.toml']
pre_install = (
MAP_VERSION_TO_INSTALL.get(instance['repo'], {})
.get(instance['version'], {})
.get('pre_install', [])
)
relevant_files = (
[
file
for file in setup_files
if any(file in install and 'sed' in install for install in pre_install)
]
if delete_setup_changes
else []
)
for i in range(10):
try:
# Appearently outputs.jsonl has .strip() applied, so we try to reconstruct the original patch by adding auxiliary whitespace
patch = unidiff.PatchSet(model_patch + i * '\n')
break
except unidiff.UnidiffParseError:
pass
to_delete = []
for i, file in enumerate(patch):
if (
any(f in file.source_file for f in relevant_files)
or file.target_file.count('/') == 1
):
to_delete.append(i)
for i in reversed(to_delete):
del patch[i]
return str(patch)
def main(
prediction_file: str,
):
"""Main function to extract the model patches from the OpenHands prediction file and turn them into the expected SWT-Bench format."""
with open(prediction_file) as f:
for line in f:
pred = json.loads(line)
try:
git_diff = pred['test_result']['git_patch']
except KeyError:
_LOGGER.warning(
'Warning: No git diff found for instance %s', pred['instance_id']
)
continue
ci_mode = pred['metadata']['details'].get('mode', '') == 'swt-ci'
try:
git_diff = remove_setup_files(git_diff, pred['instance'], ci_mode)
except: # noqa: E722
_LOGGER.warning(
'Warning: Invalid git diff found for instance %s',
pred['instance_id'],
)
print(
json.dumps(
{
'instance_id': pred['instance_id'],
'model_name_or_path': f'{pred["metadata"]["llm_config"]["openrouter_app_name"]}__{pred["metadata"]["agent_class"]}__{pred["metadata"]["llm_config"]["model"]}',
'model_patch': git_diff,
'full_output': json.dumps(pred),
}
)
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--prediction_file',
type=str,
required=True,
help='Path to the prediction file (.../outputs.jsonl)',
)
args = parser.parse_args()
main(args.prediction_file)
-7
View File
@@ -1,10 +1,3 @@
# Run frontend checks
echo "Running frontend checks..."
cd frontend
npm run check-unlocalized-strings
npx lint-staged
# Run backend pre-commit
echo "Running backend pre-commit..."
cd ..
pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml
@@ -223,7 +223,7 @@ describe("ChatInput", () => {
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
expect(textarea).toBeInTheDocument();
// The actual verification of maxRows=16 is handled internally by the TextareaAutosize component
// and affects how many rows the textarea can expand to
});
@@ -1,8 +1,8 @@
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import type { Message } from "#/message";
import { act, screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import type { Message } from "#/message";
import { addUserMessage } from "#/state/chat-slice";
import { SUGGESTIONS } from "#/utils/suggestions";
import * as ChatSlice from "#/state/chat-slice";
@@ -45,15 +45,7 @@ describe("Empty state", () => {
it("should render suggestions if empty", () => {
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
chat: { messages: [] },
},
});
@@ -76,15 +68,7 @@ describe("Empty state", () => {
it("should render the default suggestions", () => {
renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
chat: { messages: [] },
},
});
@@ -114,15 +98,7 @@ describe("Empty state", () => {
const user = userEvent.setup();
const { store } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
chat: { messages: [] },
},
});
@@ -151,15 +127,7 @@ describe("Empty state", () => {
const user = userEvent.setup();
const { rerender } = renderWithProviders(<ChatInterface />, {
preloadedState: {
chat: {
messages: [],
systemMessage: {
content: "",
tools: [],
openhands_version: null,
agent_class: null
}
},
chat: { messages: [] },
},
});
@@ -95,23 +95,6 @@ describe("ExpandableMessage", () => {
expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
});
it("should render with neutral border and no icon for action messages with undefined success (timeout case)", () => {
renderWithProviders(
<ExpandableMessage
id="OBSERVATION_MESSAGE$RUN"
message="Command timed out"
type="action"
success={undefined}
/>,
);
const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
const container = element.closest(
"div.flex.gap-2.items-center.justify-start",
);
expect(container).toHaveClass("border-neutral-300");
expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
});
it("should render the out of credits message when the user is out of credits", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - We only care about the APP_MODE and FEATURE_FLAGS fields
@@ -3,46 +3,34 @@ import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
import userEvent from "@testing-library/user-event";
import { AuthModal } from "#/components/features/waitlist/auth-modal";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import * as AuthHook from "#/context/auth-context";
describe("AuthModal", () => {
beforeAll(() => {
vi.stubGlobal("location", { href: "" });
vi.spyOn(AuthHook, "useAuth").mockReturnValue({
providersAreSet: false,
setProvidersAreSet: vi.fn(),
providerTokensSet: [],
setProviderTokensSet: vi.fn()
});
});
afterAll(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("should render a tos checkbox that is unchecked by default", () => {
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
render(<AuthModal githubAuthUrl={null} />);
const checkbox = screen.getByRole("checkbox");
expect(checkbox).not.toBeChecked();
});
it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
it("should only enable the GitHub button if the tos checkbox is checked", async () => {
const user = userEvent.setup();
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
render(<AuthModal githubAuthUrl={null} />);
const checkbox = screen.getByRole("checkbox");
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
expect(githubButton).toBeDisabled();
expect(gitlabButton).toBeDisabled();
expect(button).toBeDisabled();
await user.click(checkbox);
expect(githubButton).not.toBeDisabled();
expect(gitlabButton).not.toBeDisabled();
expect(button).not.toBeDisabled();
});
it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
@@ -52,7 +40,7 @@ describe("AuthModal", () => {
);
const user = userEvent.setup();
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
render(<AuthModal githubAuthUrl="mock-url" />);
const checkbox = screen.getByRole("checkbox");
await user.click(checkbox);
@@ -56,16 +56,12 @@ describe("GitRepositorySelector", () => {
full_name: "test/repo1",
git_provider: "github" as Provider,
stargazers_count: 100,
is_public: true,
pushed_at: "2023-01-01T00:00:00Z",
},
{
id: 2,
full_name: "test/repo2",
git_provider: "github" as Provider,
stargazers_count: 200,
is_public: true,
pushed_at: "2023-01-02T00:00:00Z",
},
];
@@ -1,70 +0,0 @@
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import { render, screen } from "@testing-library/react";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import { describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { AuthProvider } from "#/context/auth-context";
import { HomeHeader } from "#/components/features/home/home-header";
import OpenHands from "#/api/open-hands";
const renderHomeHeader = () => {
const RouterStub = createRoutesStub([
{
Component: HomeHeader,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
describe("HomeHeader", () => {
it("should create an empty conversation and redirect when pressing the launch from scratch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderHomeHeader();
const launchButton = screen.getByRole("button", {
name: /launch from scratch/i,
});
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
undefined,
undefined,
[],
undefined,
);
// expect to be redirected to /conversations/:conversationId
await screen.findByTestId("conversation-screen");
});
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
renderHomeHeader();
const launchButton = screen.getByRole("button", {
name: /launch from scratch/i,
});
await userEvent.click(launchButton);
expect(launchButton).toHaveTextContent(/Loading/i);
expect(launchButton).toBeDisabled();
});
});
@@ -1,229 +0,0 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import { describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import { setupStore } from "test-utils";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
import { GitRepository } from "#/types/git";
import * as GitService from "#/api/git";
import { RepoConnector } from "#/components/features/home/repo-connector";
const renderRepoConnector = (initialProvidersAreSet = true) => {
const mockRepoSelection = vi.fn();
const RouterStub = createRoutesStub([
{
Component: () => <RepoConnector onRepoSelection={mockRepoSelection} />,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
const MOCK_RESPOSITORIES: GitRepository[] = [
{
id: 1,
full_name: "rbren/polaris",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "All-Hands-AI/OpenHands",
git_provider: "github",
is_public: true,
},
];
describe("RepoConnector", () => {
it("should render the repository connector section", () => {
renderRepoConnector();
screen.getByTestId("repo-connector");
});
it("should render the available repositories in the dropdown", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
// Wait for the loading state to be replaced with the dropdown
const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
await userEvent.click(dropdown);
await waitFor(() => {
screen.getByText("rbren/polaris");
screen.getByText("All-Hands-AI/OpenHands");
});
});
it("should only enable the launch button if a repo is selected", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
const launchButton = screen.getByTestId("repo-launch-button");
expect(launchButton).toBeDisabled();
// Wait for the loading state to be replaced with the dropdown
const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
await userEvent.click(dropdown);
await userEvent.click(screen.getByText("rbren/polaris"));
expect(launchButton).toBeEnabled();
});
it("should render the 'add git(hub|lab) repos' links if saas mode", async () => {
const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return the APP_MODE
getConfiSpy.mockResolvedValue({
APP_MODE: "saas",
});
renderRepoConnector();
await screen.findByText("Add GitHub repos");
});
it("should not render the 'add git(hub|lab) repos' links if oss mode", async () => {
const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return the APP_MODE
getConfiSpy.mockResolvedValue({
APP_MODE: "oss",
});
renderRepoConnector();
expect(screen.queryByText("Add GitHub repos")).not.toBeInTheDocument();
expect(screen.queryByText("Add GitLab repos")).not.toBeInTheDocument();
});
it("should create a conversation and redirect with the selected repo when pressing the launch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
const repoConnector = screen.getByTestId("repo-connector");
const launchButton =
within(repoConnector).getByTestId("repo-launch-button");
await userEvent.click(launchButton);
// repo not selected yet
expect(createConversationSpy).not.toHaveBeenCalled();
// select a repository from the dropdown
const dropdown = await waitFor(() =>
within(repoConnector).getByTestId("repo-dropdown")
);
await userEvent.click(dropdown);
const repoOption = screen.getByText("rbren/polaris");
await userEvent.click(repoOption);
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
{
full_name: "rbren/polaris",
git_provider: "github",
id: 1,
is_public: true,
},
undefined,
[],
undefined,
);
});
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
const launchButton = screen.getByTestId("repo-launch-button");
// Wait for the loading state to be replaced with the dropdown
const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
await userEvent.click(dropdown);
await userEvent.click(screen.getByText("rbren/polaris"));
await userEvent.click(launchButton);
expect(launchButton).toBeDisabled();
expect(launchButton).toHaveTextContent(/Loading/i);
});
it("should not display a button to settings if the user is signed in with their git provider", async () => {
renderRepoConnector(true);
expect(
screen.queryByTestId("navigate-to-settings-button"),
).not.toBeInTheDocument();
});
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
renderRepoConnector(false);
const goToSettingsButton = await screen.findByTestId(
"navigate-to-settings-button",
);
const dropdown = screen.queryByTestId("repo-dropdown");
const launchButton = screen.queryByTestId("repo-launch-button");
const providerLinks = screen.queryAllByText(/add git(hub|lab) repos/i);
expect(dropdown).not.toBeInTheDocument();
expect(launchButton).not.toBeInTheDocument();
expect(providerLinks.length).toBe(0);
expect(goToSettingsButton).toBeInTheDocument();
await userEvent.click(goToSettingsButton);
await screen.findByTestId("settings-screen");
});
});
@@ -1,206 +0,0 @@
import { render, screen } from "@testing-library/react";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
import { TaskCard } from "#/components/features/home/tasks/task-card";
import * as GitService from "#/api/git";
import { GitRepository } from "#/types/git";
import {
getFailingChecksPrompt,
getMergeConflictPrompt,
getOpenIssuePrompt,
getUnresolvedCommentsPrompt,
} from "#/components/features/home/tasks/get-prompt-for-query";
const MOCK_TASK_1: SuggestedTask = {
issue_number: 123,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
};
const MOCK_TASK_2: SuggestedTask = {
issue_number: 456,
repo: "repo2",
title: "Task 2",
task_type: "FAILING_CHECKS",
git_provider: "github",
};
const MOCK_TASK_3: SuggestedTask = {
issue_number: 789,
repo: "repo3",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
git_provider: "gitlab",
};
const MOCK_TASK_4: SuggestedTask = {
issue_number: 101112,
repo: "repo4",
title: "Task 4",
task_type: "OPEN_ISSUE",
git_provider: "gitlab",
};
const MOCK_RESPOSITORIES: GitRepository[] = [
{ id: 1, full_name: "repo1", git_provider: "github", is_public: true },
{ id: 2, full_name: "repo2", git_provider: "github", is_public: true },
{ id: 3, full_name: "repo3", git_provider: "gitlab", is_public: true },
{ id: 4, full_name: "repo4", git_provider: "gitlab", is_public: true },
];
const renderTaskCard = (task = MOCK_TASK_1) => {
const RouterStub = createRoutesStub([
{
Component: () => <TaskCard task={task} />,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
describe("TaskCard", () => {
it("format the issue id", async () => {
renderTaskCard();
const taskId = screen.getByTestId("task-id");
expect(taskId).toHaveTextContent(/#123/i);
});
it("should call createConversation when clicking the launch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard();
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalled();
});
describe("creating conversation prompts", () => {
beforeEach(() => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
});
it("should call create conversation with the merge conflict prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_1);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[0],
getMergeConflictPrompt(
MOCK_TASK_1.git_provider,
MOCK_TASK_1.issue_number,
MOCK_TASK_1.repo,
),
[],
undefined,
);
});
it("should call create conversation with the failing checks prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_2);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[1],
getFailingChecksPrompt(
MOCK_TASK_2.git_provider,
MOCK_TASK_2.issue_number,
MOCK_TASK_2.repo,
),
[],
undefined,
);
});
it("should call create conversation with the unresolved comments prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_3);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[2],
getUnresolvedCommentsPrompt(
MOCK_TASK_3.git_provider,
MOCK_TASK_3.issue_number,
MOCK_TASK_3.repo,
),
[],
undefined,
);
});
it("should call create conversation with the open issue prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_4);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[3],
getOpenIssuePrompt(
MOCK_TASK_4.git_provider,
MOCK_TASK_4.issue_number,
MOCK_TASK_4.repo,
),
[],
undefined,
);
});
});
it("should disable the launch button and update text content when creating a conversation", async () => {
renderTaskCard();
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(launchButton).toHaveTextContent(/Loading/i);
expect(launchButton).toBeDisabled();
});
});
@@ -1,113 +0,0 @@
import { render, screen, waitFor } from "@testing-library/react";
import { afterEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import userEvent from "@testing-library/user-event";
import { TaskSuggestions } from "#/components/features/home/tasks/task-suggestions";
import { SuggestionsService } from "#/api/suggestions-service/suggestions-service.api";
import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
import { AuthProvider } from "#/context/auth-context";
const renderTaskSuggestions = (initialProvidersAreSet = true) => {
const RouterStub = createRoutesStub([
{
Component: TaskSuggestions,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
]);
return render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
};
describe("TaskSuggestions", () => {
const getSuggestedTasksSpy = vi.spyOn(
SuggestionsService,
"getSuggestedTasks",
);
afterEach(() => {
vi.clearAllMocks();
});
it("should render the task suggestions section", () => {
renderTaskSuggestions();
screen.getByTestId("task-suggestions");
});
it("should render an empty message if there are no tasks", async () => {
getSuggestedTasksSpy.mockResolvedValue([]);
renderTaskSuggestions();
await screen.findByText(/No tasks available/i);
});
it("should render the task groups with the correct titles", async () => {
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
renderTaskSuggestions();
await waitFor(() => {
MOCK_TASKS.forEach((taskGroup) => {
screen.getByText(taskGroup.title);
});
});
});
it("should render the task cards with the correct task details", async () => {
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
renderTaskSuggestions();
await waitFor(() => {
MOCK_TASKS.forEach((task) => {
screen.getByText(task.title);
});
});
});
it("should render skeletons when loading", async () => {
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
renderTaskSuggestions();
const skeletons = screen.getAllByTestId("task-group-skeleton");
expect(skeletons.length).toBeGreaterThan(0);
await waitFor(() => {
MOCK_TASKS.forEach((taskGroup) => {
screen.getByText(taskGroup.title);
});
});
expect(screen.queryByTestId("task-group-skeleton")).not.toBeInTheDocument();
});
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
renderTaskSuggestions(false);
expect(getSuggestedTasksSpy).not.toHaveBeenCalled();
const goToSettingsButton = await screen.findByTestId(
"navigate-to-settings-button",
);
expect(goToSettingsButton).toBeInTheDocument();
await userEvent.click(goToSettingsButton);
await screen.findByTestId("settings-screen");
});
});
@@ -61,25 +61,25 @@ describe("PaymentForm", () => {
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50");
await user.type(topUpInput, "50.12");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.12);
});
it("should only accept integer values", async () => {
it("should round the top-up amount to two decimal places", async () => {
const user = userEvent.setup();
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50");
await user.type(topUpInput, "50.125456");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.13);
});
it("should disable the top-up button if the user enters an invalid amount", async () => {
@@ -100,7 +100,7 @@ describe("PaymentForm", () => {
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50");
await user.type(topUpInput, "50.12");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
@@ -114,7 +114,7 @@ describe("PaymentForm", () => {
renderPaymentForm();
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "-50");
await user.type(topUpInput, "-50.12");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
@@ -139,8 +139,6 @@ describe("PaymentForm", () => {
const user = userEvent.setup();
renderPaymentForm();
// With type="number", the browser would prevent non-numeric input,
// but we'll test the validation logic anyway
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "abc");
@@ -162,19 +160,5 @@ describe("PaymentForm", () => {
expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
});
test("user enters a decimal value", async () => {
const user = userEvent.setup();
renderPaymentForm();
// With step="1", the browser would validate this, but we'll test our validation logic
const topUpInput = await screen.findByTestId("top-up-input");
await user.type(topUpInput, "50.5");
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
await user.click(topUpButton);
expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
});
});
});
@@ -1,18 +1,9 @@
import { render, screen } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { describe, it, expect } from "vitest";
import { Messages } from "#/components/features/chat/messages";
import type { Message } from "#/message";
import { renderWithProviders } from "test-utils";
// Mock the useParams hook to provide a conversationId
vi.mock("react-router", async () => {
const actual = await vi.importActual<typeof import("react-router")>("react-router");
return {
...actual,
useParams: () => ({ conversationId: "test-conversation-id" }),
};
});
describe("File Operations Messages", () => {
it("should show success indicator for successful file read operation", () => {
const messages: Message[] = [
+16 -27
View File
@@ -1,19 +1,9 @@
import { beforeAll, describe, expect, it, vi } from "vitest";
import { render } from "@testing-library/react";
import { afterEach } from "node:test";
import { ReactNode } from "react";
import { useTerminal } from "#/hooks/use-terminal";
import { Command } from "#/state/command-slice";
import { AgentState } from "#/types/agent-state";
import { renderWithProviders } from "../../test-utils";
// Mock the WsClient context
vi.mock("#/context/ws-client-provider", () => ({
useWsClient: () => ({
send: vi.fn(),
status: "CONNECTED",
isLoadingMessages: false,
events: [],
}),
}));
interface TestTerminalComponentProps {
commands: Command[];
@@ -26,6 +16,14 @@ function TestTerminalComponent({
return <div ref={ref} />;
}
interface WrapperProps {
children: ReactNode;
}
function Wrapper({ children }: WrapperProps) {
return <div>{children}</div>;
}
describe("useTerminal", () => {
const mockTerminal = vi.hoisted(() => ({
loadAddon: vi.fn(),
@@ -57,11 +55,8 @@ describe("useTerminal", () => {
});
it("should render", () => {
renderWithProviders(<TestTerminalComponent commands={[]} />, {
preloadedState: {
agent: { curAgentState: AgentState.RUNNING },
cmd: { commands: [] },
},
render(<TestTerminalComponent commands={[]} />, {
wrapper: Wrapper,
});
});
@@ -71,11 +66,8 @@ describe("useTerminal", () => {
{ content: "hello", type: "output" },
];
renderWithProviders(<TestTerminalComponent commands={commands} />, {
preloadedState: {
agent: { curAgentState: AgentState.RUNNING },
cmd: { commands },
},
render(<TestTerminalComponent commands={commands} />, {
wrapper: Wrapper,
});
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(1, "echo hello");
@@ -94,15 +86,12 @@ describe("useTerminal", () => {
{ content: secret, type: "output" },
];
renderWithProviders(
render(
<TestTerminalComponent
commands={commands}
/>,
{
preloadedState: {
agent: { curAgentState: AgentState.RUNNING },
cmd: { commands },
},
wrapper: Wrapper,
},
);
@@ -1,370 +0,0 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { createRoutesStub } from "react-router";
import { Provider } from "react-redux";
import { setupStore } from "test-utils";
import { AxiosError } from "axios";
import HomeScreen from "#/routes/home";
import { AuthProvider } from "#/context/auth-context";
import * as GitService from "#/api/git";
import { GitRepository } from "#/types/git";
import OpenHands from "#/api/open-hands";
import MainApp from "#/routes/root-layout";
const createAxiosNotFoundErrorObject = () =>
new AxiosError(
"Request failed with status code 404",
"ERR_BAD_REQUEST",
undefined,
undefined,
{
status: 404,
statusText: "Not Found",
data: { message: "Settings not found" },
headers: {},
// @ts-expect-error - we only need the response object for this test
config: {},
},
);
const RouterStub = createRoutesStub([
{
Component: MainApp,
path: "/",
children: [
{
Component: HomeScreen,
path: "/",
},
{
Component: () => <div data-testid="conversation-screen" />,
path: "/conversations/:conversationId",
},
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
],
},
]);
const renderHomeScreen = (initialProvidersAreSet = true) =>
render(<RouterStub />, {
wrapper: ({ children }) => (
<Provider store={setupStore()}>
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
<QueryClientProvider client={new QueryClient()}>
{children}
</QueryClientProvider>
</AuthProvider>
</Provider>
),
});
const MOCK_RESPOSITORIES: GitRepository[] = [
{
id: 1,
full_name: "octocat/hello-world",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "octocat/earth",
git_provider: "github",
is_public: true,
},
];
describe("HomeScreen", () => {
it("should render", () => {
renderHomeScreen();
screen.getByTestId("home-screen");
});
it("should render the repository connector and suggested tasks sections", async () => {
renderHomeScreen();
screen.getByTestId("repo-connector");
screen.getByTestId("task-suggestions");
});
it("should filter the suggested tasks based on the selected repository", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderHomeScreen();
const taskSuggestions = screen.getByTestId("task-suggestions");
// Initially, all tasks should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
within(taskSuggestions).getByText("octocat/earth");
});
// Select a repository from the dropdown
const repoConnector = screen.getByTestId("repo-connector");
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
await userEvent.click(dropdown);
const repoOption = screen.getAllByText("octocat/hello-world")[1];
await userEvent.click(repoOption);
// After selecting a repository, only tasks related to that repository should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
expect(
within(taskSuggestions).queryByText("octocat/earth"),
).not.toBeInTheDocument();
});
});
it("should reset the filtered tasks when the selected repository is cleared", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderHomeScreen();
const taskSuggestions = screen.getByTestId("task-suggestions");
// Initially, all tasks should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
within(taskSuggestions).getByText("octocat/earth");
});
// Select a repository from the dropdown
const repoConnector = screen.getByTestId("repo-connector");
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
await userEvent.click(dropdown);
const repoOption = screen.getAllByText("octocat/hello-world")[1];
await userEvent.click(repoOption);
// After selecting a repository, only tasks related to that repository should be visible
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
expect(
within(taskSuggestions).queryByText("octocat/earth"),
).not.toBeInTheDocument();
});
// Clear the selected repository
await userEvent.clear(dropdown);
// All tasks should be visible again
await waitFor(() => {
within(taskSuggestions).getByText("octocat/hello-world");
within(taskSuggestions).getByText("octocat/earth");
});
});
describe("launch buttons", () => {
const setupLaunchButtons = async () => {
let headerLaunchButton = screen.getByTestId("header-launch-button");
let repoLaunchButton = screen.getByTestId("repo-launch-button");
let tasksLaunchButtons =
await screen.findAllByTestId("task-launch-button");
// Select a repository from the dropdown to enable the repo launch button
const repoConnector = screen.getByTestId("repo-connector");
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
await userEvent.click(dropdown);
const repoOption = screen.getAllByText("octocat/hello-world")[1];
await userEvent.click(repoOption);
expect(headerLaunchButton).not.toBeDisabled();
expect(repoLaunchButton).not.toBeDisabled();
tasksLaunchButtons.forEach((button) => {
expect(button).not.toBeDisabled();
});
headerLaunchButton = screen.getByTestId("header-launch-button");
repoLaunchButton = screen.getByTestId("repo-launch-button");
tasksLaunchButtons = await screen.findAllByTestId("task-launch-button");
return {
headerLaunchButton,
repoLaunchButton,
tasksLaunchButtons,
};
};
beforeEach(() => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
});
it("should disable the other launch buttons when the header launch button is clicked", async () => {
renderHomeScreen();
const { headerLaunchButton, repoLaunchButton } =
await setupLaunchButtons();
const tasksLaunchButtonsAfter =
await screen.findAllByTestId("task-launch-button");
// All other buttons should be disabled when the header button is clicked
await userEvent.click(headerLaunchButton);
expect(headerLaunchButton).toBeDisabled();
expect(repoLaunchButton).toBeDisabled();
tasksLaunchButtonsAfter.forEach((button) => {
expect(button).toBeDisabled();
});
});
it("should disable the other launch buttons when the repo launch button is clicked", async () => {
renderHomeScreen();
const { headerLaunchButton, repoLaunchButton } =
await setupLaunchButtons();
const tasksLaunchButtonsAfter =
await screen.findAllByTestId("task-launch-button");
// All other buttons should be disabled when the repo button is clicked
await userEvent.click(repoLaunchButton);
expect(headerLaunchButton).toBeDisabled();
expect(repoLaunchButton).toBeDisabled();
tasksLaunchButtonsAfter.forEach((button) => {
expect(button).toBeDisabled();
});
});
it("should disable the other launch buttons when any task launch button is clicked", async () => {
renderHomeScreen();
const { headerLaunchButton, repoLaunchButton, tasksLaunchButtons } =
await setupLaunchButtons();
const tasksLaunchButtonsAfter =
await screen.findAllByTestId("task-launch-button");
// All other buttons should be disabled when the task button is clicked
await userEvent.click(tasksLaunchButtons[0]);
expect(headerLaunchButton).toBeDisabled();
expect(repoLaunchButton).toBeDisabled();
tasksLaunchButtonsAfter.forEach((button) => {
expect(button).toBeDisabled();
});
});
});
it("should hide the suggested tasks section if not authed with git(hub|lab)", async () => {
renderHomeScreen(false);
const taskSuggestions = screen.queryByTestId("task-suggestions");
const repoConnector = screen.getByTestId("repo-connector");
expect(taskSuggestions).not.toBeInTheDocument();
expect(repoConnector).toBeInTheDocument();
});
});
describe("Settings 404", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
it("should open the settings modal if GET /settings fails with a 404", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderHomeScreen();
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
});
it("should navigate to the settings screen when clicking the advanced settings button", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
const user = userEvent.setup();
renderHomeScreen();
const settingsScreen = screen.queryByTestId("settings-screen");
expect(settingsScreen).not.toBeInTheDocument();
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
const advancedSettingsButton = await screen.findByTestId(
"advanced-settings-link",
);
await user.click(advancedSettingsButton);
const settingsScreenAfter = await screen.findByTestId("settings-screen");
expect(settingsScreenAfter).toBeInTheDocument();
const settingsModalAfter = screen.queryByTestId("ai-config-modal");
expect(settingsModalAfter).not.toBeInTheDocument();
});
it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
// @ts-expect-error - we only need APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderHomeScreen();
// small hack to wait for the modal to not appear
await expect(
screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
).rejects.toThrow();
});
});
describe("Setup Payment modal", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
it("should only render if SaaS mode and is new user", async () => {
// @ts-expect-error - we only need the APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: true,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderHomeScreen();
const setupPaymentModal = await screen.findByTestId(
"proceed-to-stripe-button",
);
expect(setupPaymentModal).toBeInTheDocument();
});
});
+177
View File
@@ -0,0 +1,177 @@
import { createRoutesStub } from "react-router";
import { afterEach, describe, expect, it, vi } from "vitest";
import { renderWithProviders } from "test-utils";
import userEvent from "@testing-library/user-event";
import { screen } from "@testing-library/react";
import { AxiosError } from "axios";
import MainApp from "#/routes/root-layout";
import SettingsScreen from "#/routes/settings";
import Home from "#/routes/home";
import OpenHands from "#/api/open-hands";
const createAxiosNotFoundErrorObject = () =>
new AxiosError(
"Request failed with status code 404",
"ERR_BAD_REQUEST",
undefined,
undefined,
{
status: 404,
statusText: "Not Found",
data: { message: "Settings not found" },
headers: {},
// @ts-expect-error - we only need the response object for this test
config: {},
},
);
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
const RouterStub = createRoutesStub([
{
// layout route
Component: MainApp,
path: "/",
children: [
{
// home route
Component: Home,
path: "/",
},
{
Component: SettingsScreen,
path: "/settings",
},
],
},
]);
afterEach(() => {
vi.clearAllMocks();
});
describe("Home Screen", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
it("should render the home screen", () => {
renderWithProviders(<RouterStub initialEntries={["/"]} />);
});
it("should navigate to the settings screen when the settings button is clicked", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const settingsButton = await screen.findByTestId("settings-button");
await user.click(settingsButton);
const settingsScreen = await screen.findByTestId("settings-screen");
expect(settingsScreen).toBeInTheDocument();
});
it("should navigate to the settings when pressing 'Connect to GitHub' if the user isn't authenticated", async () => {
// @ts-expect-error - we only need APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
const user = userEvent.setup();
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const connectToGitHubButton =
await screen.findByTestId("connect-to-github");
await user.click(connectToGitHubButton);
const settingsScreen = await screen.findByTestId("settings-screen");
expect(settingsScreen).toBeInTheDocument();
});
});
describe("Settings 404", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
it("should open the settings modal if GET /settings fails with a 404", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
});
it("should navigate to the settings screen when clicking the advanced settings button", async () => {
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
const user = userEvent.setup();
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const settingsScreen = screen.queryByTestId("settings-screen");
expect(settingsScreen).not.toBeInTheDocument();
const settingsModal = await screen.findByTestId("ai-config-modal");
expect(settingsModal).toBeInTheDocument();
const advancedSettingsButton = await screen.findByTestId(
"advanced-settings-link",
);
await user.click(advancedSettingsButton);
const settingsScreenAfter = await screen.findByTestId("settings-screen");
expect(settingsScreenAfter).toBeInTheDocument();
const settingsModalAfter = screen.queryByTestId("ai-config-modal");
expect(settingsModalAfter).not.toBeInTheDocument();
});
it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
// @ts-expect-error - we only need APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderWithProviders(<RouterStub initialEntries={["/"]} />);
// small hack to wait for the modal to not appear
await expect(
screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
).rejects.toThrow();
});
});
describe("Setup Payment modal", () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
afterEach(() => {
vi.resetAllMocks();
});
it("should only render if SaaS mode and is new user", async () => {
// @ts-expect-error - we only need the APP_MODE for this test
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
FEATURE_FLAGS: {
ENABLE_BILLING: true,
HIDE_LLM_SETTINGS: false,
},
});
const error = createAxiosNotFoundErrorObject();
getSettingsSpy.mockRejectedValue(error);
renderWithProviders(<RouterStub initialEntries={["/"]} />);
const setupPaymentModal = await screen.findByTestId(
"proceed-to-stripe-button",
);
expect(setupPaymentModal).toBeInTheDocument();
});
});
@@ -43,12 +43,10 @@ describe("Settings Billing", () => {
renderSettingsScreen();
// Wait for the settings screen to be rendered
await screen.findByTestId("settings-screen");
// Then check that the navbar is not present
const navbar = screen.queryByTestId("settings-navbar");
expect(navbar).not.toBeInTheDocument();
await waitFor(() => {
const navbar = screen.queryByTestId("settings-navbar");
expect(navbar).not.toBeInTheDocument();
});
});
it("should render the navbar if SaaS mode", async () => {
+138
View File
@@ -25,6 +25,7 @@ const mock_provider_tokens_are_set: Record<Provider, boolean> = {
describe("Settings Screen", () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const resetSettingsSpy = vi.spyOn(OpenHands, "resetSettings");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const { handleLogoutMock } = vi.hoisted(() => ({
@@ -66,6 +67,7 @@ describe("Settings Screen", () => {
// Use queryAllByText to handle multiple elements with the same text
expect(screen.queryAllByText("SETTINGS$LLM_SETTINGS")).not.toHaveLength(0);
screen.getByText("ACCOUNT_SETTINGS$ADDITIONAL_SETTINGS");
screen.getByText("BUTTON$RESET_TO_DEFAULTS");
screen.getByText("BUTTON$SAVE");
});
});
@@ -540,6 +542,54 @@ describe("Settings Screen", () => {
});
});
test("resetting settings with no changes but having advanced enabled should hide the advanced items", async () => {
const user = userEvent.setup();
getSettingsSpy.mockResolvedValueOnce({
...MOCK_DEFAULT_USER_SETTINGS,
});
renderSettingsScreen();
await toggleAdvancedSettings(user);
const resetButton = screen.getByText("BUTTON$RESET_TO_DEFAULTS");
await user.click(resetButton);
// show modal
const modal = await screen.findByTestId("reset-modal");
expect(modal).toBeInTheDocument();
// Mock the settings that will be returned after reset
// This should be the default settings with no advanced settings enabled
getSettingsSpy.mockResolvedValueOnce({
...MOCK_DEFAULT_USER_SETTINGS,
llm_base_url: "",
confirmation_mode: false,
security_analyzer: "",
});
// confirm reset
const confirmButton = within(modal).getByText("Reset");
await user.click(confirmButton);
await waitFor(() => {
expect(
screen.queryByTestId("llm-custom-model-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("base-url-input"),
).not.toBeInTheDocument();
expect(screen.queryByTestId("agent-input")).not.toBeInTheDocument();
expect(
screen.queryByTestId("security-analyzer-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("enable-confirmation-mode-switch"),
).not.toBeInTheDocument();
});
});
it("should save if only confirmation mode is enabled", async () => {
const user = userEvent.setup();
renderSettingsScreen();
@@ -712,6 +762,81 @@ describe("Settings Screen", () => {
);
});
it("should reset the settings when the 'Reset to defaults' button is clicked", async () => {
const user = userEvent.setup();
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderSettingsScreen();
const languageInput = await screen.findByTestId("language-input");
await user.click(languageInput);
const norskOption = await screen.findByText("Norsk");
await user.click(norskOption);
expect(languageInput).toHaveValue("Norsk");
const resetButton = screen.getByText("BUTTON$RESET_TO_DEFAULTS");
await user.click(resetButton);
expect(saveSettingsSpy).not.toHaveBeenCalled();
// show modal
const modal = await screen.findByTestId("reset-modal");
expect(modal).toBeInTheDocument();
// confirm reset
const confirmButton = within(modal).getByText("Reset");
await user.click(confirmButton);
await waitFor(() => {
expect(resetSettingsSpy).toHaveBeenCalled();
});
// Mock the settings response after reset
getSettingsSpy.mockResolvedValueOnce({
...MOCK_DEFAULT_USER_SETTINGS,
llm_base_url: "",
confirmation_mode: false,
security_analyzer: "",
});
// Wait for the mutation to complete and the modal to be removed
await waitFor(() => {
expect(screen.queryByTestId("reset-modal")).not.toBeInTheDocument();
expect(
screen.queryByTestId("llm-custom-model-input"),
).not.toBeInTheDocument();
expect(screen.queryByTestId("base-url-input")).not.toBeInTheDocument();
expect(screen.queryByTestId("agent-input")).not.toBeInTheDocument();
expect(
screen.queryByTestId("security-analyzer-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("enable-confirmation-mode-switch"),
).not.toBeInTheDocument();
});
});
it("should cancel the reset when the 'Cancel' button is clicked", async () => {
const user = userEvent.setup();
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderSettingsScreen();
const resetButton = await screen.findByText("BUTTON$RESET_TO_DEFAULTS");
await user.click(resetButton);
const modal = await screen.findByTestId("reset-modal");
expect(modal).toBeInTheDocument();
const cancelButton = within(modal).getByText("Cancel");
await user.click(cancelButton);
expect(saveSettingsSpy).not.toHaveBeenCalled();
expect(screen.queryByTestId("reset-modal")).not.toBeInTheDocument();
});
it("should call handleCaptureConsent with true if the save is successful", async () => {
const user = userEvent.setup();
const handleCaptureConsentSpy = vi.spyOn(
@@ -919,5 +1044,18 @@ describe("Settings Screen", () => {
);
});
it("should not submit the unwanted fields when resetting", async () => {
const user = userEvent.setup();
renderSettingsScreen();
const resetButton = await screen.findByText("BUTTON$RESET_TO_DEFAULTS");
await user.click(resetButton);
const modal = await screen.findByTestId("reset-modal");
const confirmButton = within(modal).getByText("Reset");
await user.click(confirmButton);
expect(saveSettingsSpy).not.toHaveBeenCalled();
expect(resetSettingsSpy).toHaveBeenCalled();
});
});
});
@@ -48,4 +48,4 @@ describe("Observations Service", () => {
});
});
});
});
});
@@ -1,101 +0,0 @@
import { expect, test } from "vitest";
import {
SuggestedTask,
SuggestedTaskGroup,
} from "#/components/features/home/tasks/task.types";
import { groupSuggestedTasks } from "#/utils/group-suggested-tasks";
const rawTasks: SuggestedTask[] = [
{
issue_number: 1,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
},
{
issue_number: 2,
repo: "repo1",
title: "Task 2",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
{
issue_number: 3,
repo: "repo2",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
git_provider: "github",
},
{
issue_number: 4,
repo: "repo2",
title: "Task 4",
task_type: "OPEN_ISSUE",
git_provider: "github",
},
{
issue_number: 5,
repo: "repo3",
title: "Task 5",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
];
const groupedTasks: SuggestedTaskGroup[] = [
{
title: "repo1",
tasks: [
{
issue_number: 1,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
},
{
issue_number: 2,
repo: "repo1",
title: "Task 2",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
],
},
{
title: "repo2",
tasks: [
{
issue_number: 3,
repo: "repo2",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
git_provider: "github",
},
{
issue_number: 4,
repo: "repo2",
title: "Task 4",
task_type: "OPEN_ISSUE",
git_provider: "github",
},
],
},
{
title: "repo3",
tasks: [
{
issue_number: 5,
repo: "repo3",
title: "Task 5",
task_type: "FAILING_CHECKS",
git_provider: "github",
},
],
},
];
test("groupSuggestedTasks", () => {
expect(groupSuggestedTasks(rawTasks)).toEqual(groupedTasks);
});
+550 -1536
View File
File diff suppressed because it is too large Load Diff
+24 -26
View File
@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.34.0",
"version": "0.33.0",
"private": true,
"type": "module",
"engines": {
@@ -8,46 +8,44 @@
},
"dependencies": {
"@heroui/react": "2.7.6",
"@microlink/react-json-view": "^1.26.1",
"@monaco-editor/react": "^4.7.0-rc.0",
"@react-router/node": "^7.5.2",
"@react-router/serve": "^7.5.2",
"@react-types/shared": "^3.29.0",
"@reduxjs/toolkit": "^2.7.0",
"@react-router/node": "^7.5.0",
"@react-router/serve": "^7.5.0",
"@react-types/shared": "^3.28.0",
"@reduxjs/toolkit": "^2.6.1",
"@stripe/react-stripe-js": "^3.6.0",
"@stripe/stripe-js": "^7.2.0",
"@tanstack/react-query": "^5.74.4",
"@vitejs/plugin-react": "^4.4.0",
"@stripe/stripe-js": "^7.0.0",
"@tanstack/react-query": "^5.72.1",
"@vitejs/plugin-react": "^4.3.2",
"@xterm/addon-fit": "^0.10.0",
"@xterm/xterm": "^5.4.0",
"axios": "^1.8.4",
"clsx": "^2.1.1",
"eslint-config-airbnb-typescript": "^18.0.0",
"framer-motion": "^12.9.1",
"i18next": "^25.0.1",
"i18next-browser-languagedetector": "^8.0.5",
"framer-motion": "^12.6.3",
"i18next": "^24.2.3",
"i18next-browser-languagedetector": "^8.0.4",
"i18next-http-backend": "^3.0.2",
"isbot": "^5.1.25",
"jose": "^6.0.10",
"lucide-react": "^0.503.0",
"monaco-editor": "^0.52.2",
"posthog-js": "^1.236.6",
"posthog-js": "^1.235.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-highlight": "^0.15.0",
"react-hot-toast": "^2.5.1",
"react-i18next": "^15.5.1",
"react-i18next": "^15.4.1",
"react-icons": "^5.5.0",
"react-markdown": "^10.1.0",
"react-redux": "^9.2.0",
"react-router": "^7.5.2",
"react-router": "^7.5.0",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.9",
"remark-gfm": "^4.0.1",
"sirv-cli": "^3.0.1",
"socket.io-client": "^4.8.1",
"tailwind-merge": "^3.2.0",
"vite": "^6.3.3",
"vite": "^6.2.5",
"web-vitals": "^3.5.2",
"ws": "^8.18.1"
},
@@ -81,29 +79,29 @@
"@babel/traverse": "^7.27.0",
"@babel/types": "^7.27.0",
"@mswjs/socket.io-binding": "^0.1.1",
"@playwright/test": "^1.52.0",
"@react-router/dev": "^7.5.2",
"@playwright/test": "^1.51.1",
"@react-router/dev": "^7.5.0",
"@tailwindcss/typography": "^0.5.16",
"@tanstack/eslint-plugin-query": "^5.73.3",
"@tanstack/eslint-plugin-query": "^5.72.1",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/node": "^22.14.1",
"@types/react": "^19.1.2",
"@types/node": "^22.14.0",
"@types/react": "^19.1.0",
"@types/react-dom": "^19.1.1",
"@types/react-highlight": "^0.12.8",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/ws": "^8.18.1",
"@typescript-eslint/eslint-plugin": "^7.18.0",
"@typescript-eslint/parser": "^7.18.0",
"@vitest/coverage-v8": "^3.1.2",
"@vitest/coverage-v8": "^3.1.1",
"autoprefixer": "^10.4.21",
"cross-env": "^7.0.3",
"eslint": "^8.57.0",
"eslint-config-airbnb": "^19.0.4",
"eslint-config-airbnb-typescript": "^18.0.0",
"eslint-config-prettier": "^10.1.2",
"eslint-config-prettier": "^10.1.1",
"eslint-plugin-import": "^2.29.1",
"eslint-plugin-jsx-a11y": "^6.10.2",
"eslint-plugin-prettier": "^5.2.6",
@@ -111,8 +109,8 @@
"eslint-plugin-react-hooks": "^4.6.2",
"eslint-plugin-unused-imports": "^4.1.4",
"husky": "^9.1.7",
"jsdom": "^26.1.0",
"lint-staged": "^15.5.1",
"jsdom": "^26.0.0",
"lint-staged": "^15.5.0",
"msw": "^2.6.6",
"postcss": "^8.5.2",
"prettier": "^3.5.3",
+1 -1
View File
@@ -8,7 +8,7 @@
* - Please do NOT serve this file on production.
*/
const PACKAGE_VERSION = '2.7.5'
const PACKAGE_VERSION = '2.7.3'
const INTEGRITY_CHECKSUM = '00729d72e3b82faf54ca8b9621dbb96f'
const IS_MOCKED_RESPONSE = Symbol('isMockedResponse')
const activeClientIds = new Set()
@@ -105,7 +105,6 @@ function isRawTranslationKey(str) {
// Specific technical strings that should be excluded from localization
const EXCLUDED_TECHNICAL_STRINGS = [
"openid email profile", // OAuth scope string - not user-facing
"OPEN_ISSUE", // Task type identifier, not a UI string
];
function isExcludedTechnicalString(str) {
@@ -276,8 +275,8 @@ function isCommonDevelopmentString(str) {
// HTML tags and attributes
if (
/^<[a-z0-9]+(?:\s[^>]*)?>.*<\/[a-z0-9]+>$/i.test(str) ||
/^<[a-z0-9]+ [^>]+\/>$/i.test(str)
/^<[a-z0-9]+>.*<\/[a-z0-9]+>$/.test(str) ||
/^<[a-z0-9]+ [^>]+\/>$/.test(str)
) {
return true;
}
-49
View File
@@ -1,49 +0,0 @@
import { openHands } from "./open-hands-axios";
export interface ApiKey {
id: string;
name: string;
prefix: string;
created_at: string;
last_used_at: string | null;
}
export interface CreateApiKeyResponse {
id: string;
name: string;
key: string; // Full key, only returned once upon creation
prefix: string;
created_at: string;
}
class ApiKeysClient {
/**
* Get all API keys for the current user
*/
static async getApiKeys(): Promise<ApiKey[]> {
const { data } = await openHands.get<unknown>("/api/keys");
// Ensure we always return an array, even if the API returns something else
return Array.isArray(data) ? (data as ApiKey[]) : [];
}
/**
* Create a new API key
* @param name - A descriptive name for the API key
*/
static async createApiKey(name: string): Promise<CreateApiKeyResponse> {
const { data } = await openHands.post<CreateApiKeyResponse>("/api/keys", {
name,
});
return data;
}
/**
* Delete an API key
* @param id - The ID of the API key to delete
*/
static async deleteApiKey(id: string): Promise<void> {
await openHands.delete(`/api/keys/${id}`);
}
}
export default ApiKeysClient;
+8 -22
View File
@@ -8,8 +8,6 @@ import {
Conversation,
ResultSet,
GetTrajectoryResponse,
GitChangeDiff,
GitChange,
} from "./open-hands.types";
import { openHands } from "./open-hands-axios";
import { ApiSettings, PostApiSettings } from "#/types/settings";
@@ -199,6 +197,14 @@ class OpenHands {
return data.status === 200;
}
/**
* Reset user settings in server
*/
static async resetSettings(): Promise<boolean> {
const response = await openHands.post("/api/reset-settings");
return response.status === 200;
}
static async createCheckoutSession(amount: number): Promise<string> {
const { data } = await openHands.post(
"/api/billing/create-checkout-session",
@@ -271,26 +277,6 @@ class OpenHands {
appMode === "saas" ? "/api/logout" : "/api/unset-settings-tokens";
await openHands.post(endpoint);
}
static async getGitChanges(conversationId: string): Promise<GitChange[]> {
const { data } = await openHands.get<GitChange[]>(
`/api/conversations/${conversationId}/git/changes`,
);
return data;
}
static async getGitChangeDiff(
conversationId: string,
path: string,
): Promise<GitChangeDiff> {
const { data } = await openHands.get<GitChangeDiff>(
`/api/conversations/${conversationId}/git/diff`,
{
params: { path },
},
);
return data;
}
}
export default OpenHands;
-15
View File
@@ -70,8 +70,6 @@ export interface AuthenticateResponse {
error?: string;
}
export type ConversationTrigger = "resolver" | "gui";
export interface Conversation {
conversation_id: string;
title: string;
@@ -79,22 +77,9 @@ export interface Conversation {
last_updated_at: string;
created_at: string;
status: ProjectStatus;
trigger?: ConversationTrigger;
}
export interface ResultSet<T> {
results: T[];
next_page_id: string | null;
}
export type GitChangeStatus = "M" | "A" | "D" | "R" | "U";
export interface GitChange {
status: GitChangeStatus;
path: string;
}
export interface GitChangeDiff {
modified: string;
original: string;
}
@@ -1,9 +0,0 @@
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
import { openHands } from "../open-hands-axios";
export class SuggestionsService {
static async getSuggestedTasks(): Promise<SuggestedTask[]> {
const { data } = await openHands.get("/api/user/suggested-tasks");
return data;
}
}
@@ -15,13 +15,13 @@
fill="black" />
<path
d="M38.7381 10.5084C38.5759 10.5084 38.4106 10.4788 38.2545 10.4076C37.6821 10.1526 37.4312 9.49736 37.6944 8.94289C38.5453 7.1431 39.791 5.48266 41.2938 4.14245C41.7559 3.73031 42.4782 3.75699 42.9037 4.20768C43.3291 4.65541 43.3016 5.35516 42.8363 5.76731C41.5539 6.91182 40.4919 8.32912 39.7634 9.86502C39.5737 10.2653 39.1666 10.5055 38.7381 10.5084Z"
fill="black" />
fill="white" />
<path
d="M34.898 9.87074C34.3073 9.87667 33.8023 9.43784 33.7533 8.85669C33.536 6.25633 33.5268 3.62039 33.7319 1.02003C33.7808 0.412188 34.3287 -0.0414663 34.9531 0.00300963C35.5805 0.0504507 36.0488 0.578232 36.0029 1.18607C35.807 3.67079 35.8162 6.1911 36.0243 8.67582C36.0763 9.28366 35.6081 9.81737 34.9806 9.86481C34.9531 9.86481 34.9255 9.86778 34.898 9.86778V9.87074Z"
fill="black" />
fill="white" />
<path
d="M30.976 10.5558C30.4649 10.5618 29.9935 10.2267 29.8619 9.7256C29.3783 7.88726 28.4632 6.14084 27.2175 4.67906C26.8165 4.20762 26.8869 3.51379 27.3705 3.12537C27.8572 2.73695 28.5734 2.80514 28.9743 3.27362C30.4312 4.98743 31.5024 7.03036 32.0656 9.18003C32.2217 9.77008 31.8514 10.372 31.2423 10.5232C31.1505 10.5469 31.0617 10.5558 30.9699 10.5588L30.976 10.5558Z"
fill="black" />
fill="white" />
</g>
</g>
<defs>

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

@@ -1,6 +0,0 @@
<svg width="22" height="22" viewBox="0 0 22 22" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M11 21L16.5 8H5.5L11 21Z" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1 8L3.5 15.5L11 21L18.5 15.5L21 8" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M1 8L5.5 8L8.25 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M21 8L16.5 8L13.75 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
</svg>

Before

Width:  |  Height:  |  Size: 534 B

@@ -53,7 +53,6 @@ export function ExpandableMessage({
});
useEffect(() => {
// If we have a translation ID, process it
if (id && i18n.exists(id)) {
let processedObservation = observation;
let processedAction = action;
@@ -4,9 +4,6 @@ import { ChatMessage } from "#/components/features/chat/chat-message";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { ImageCarousel } from "../images/image-carousel";
import { ExpandableMessage } from "./expandable-message";
import { useUserConversation } from "#/hooks/query/use-user-conversation";
import { useConversation } from "#/context/conversation-context";
import { I18nKey } from "#/i18n/declaration";
interface MessagesProps {
messages: Message[];
@@ -14,38 +11,13 @@ interface MessagesProps {
}
export const Messages: React.FC<MessagesProps> = React.memo(
({ messages, isAwaitingUserConfirmation }) => {
const { conversationId } = useConversation();
const { data: conversation } = useUserConversation(conversationId || null);
// Check if conversation metadata has trigger=resolver
const isResolverTrigger = conversation?.trigger === "resolver";
return messages.map((message, index) => {
({ messages, isAwaitingUserConfirmation }) =>
messages.map((message, index) => {
const shouldShowConfirmationButtons =
messages.length - 1 === index &&
message.sender === "assistant" &&
isAwaitingUserConfirmation;
const isFirstUserMessageWithResolverTrigger =
index === 0 && message.sender === "user" && isResolverTrigger;
// Special case: First user message with resolver trigger
if (isFirstUserMessageWithResolverTrigger) {
return (
<div key={index}>
<ExpandableMessage
type="action"
message={message.content}
id={I18nKey.CHAT$RESOLVER_INSTRUCTIONS}
/>
{message.imageUrls && message.imageUrls.length > 0 && (
<ImageCarousel size="small" images={message.imageUrls} />
)}
</div>
);
}
if (message.type === "error" || message.type === "action") {
return (
<div key={index}>
@@ -74,8 +46,7 @@ export const Messages: React.FC<MessagesProps> = React.memo(
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</ChatMessage>
);
});
},
}),
);
Messages.displayName = "Messages";
@@ -8,7 +8,6 @@ interface ConversationCardContextMenuProps {
onDelete?: (event: React.MouseEvent<HTMLButtonElement>) => void;
onEdit?: (event: React.MouseEvent<HTMLButtonElement>) => void;
onDisplayCost?: (event: React.MouseEvent<HTMLButtonElement>) => void;
onShowAgentTools?: (event: React.MouseEvent<HTMLButtonElement>) => void;
onDownloadViaVSCode?: (event: React.MouseEvent<HTMLButtonElement>) => void;
position?: "top" | "bottom";
}
@@ -18,7 +17,6 @@ export function ConversationCardContextMenu({
onDelete,
onEdit,
onDisplayCost,
onShowAgentTools,
onDownloadViaVSCode,
position = "bottom",
}: ConversationCardContextMenuProps) {
@@ -60,14 +58,6 @@ export function ConversationCardContextMenu({
Display Cost
</ContextMenuListItem>
)}
{onShowAgentTools && (
<ContextMenuListItem
testId="show-agent-tools-button"
onClick={onShowAgentTools}
>
Show Agent Tools & Metadata
</ContextMenuListItem>
)}
</ContextMenu>
);
}
@@ -10,12 +10,10 @@ import {
} from "./conversation-state-indicator";
import { EllipsisButton } from "./ellipsis-button";
import { ConversationCardContextMenu } from "./conversation-card-context-menu";
import { SystemMessageModal } from "./system-message-modal";
import { cn } from "#/utils/utils";
import { BaseModal } from "../../shared/modals/base-modal/base-modal";
import { RootState } from "#/store";
import { I18nKey } from "#/i18n/declaration";
import { selectSystemMessage } from "#/state/chat-slice";
interface ConversationCardProps {
onClick?: () => void;
@@ -54,12 +52,10 @@ export function ConversationCard({
const [contextMenuVisible, setContextMenuVisible] = React.useState(false);
const [titleMode, setTitleMode] = React.useState<"view" | "edit">("view");
const [metricsModalVisible, setMetricsModalVisible] = React.useState(false);
const [systemModalVisible, setSystemModalVisible] = React.useState(false);
const inputRef = React.useRef<HTMLInputElement>(null);
// Subscribe to metrics data from Redux store
const metrics = useSelector((state: RootState) => state.metrics);
const systemMessage = useSelector(selectSystemMessage);
const handleBlur = () => {
if (inputRef.current?.value) {
@@ -133,11 +129,6 @@ export function ConversationCard({
setMetricsModalVisible(true);
};
const handleShowAgentTools = (event: React.MouseEvent<HTMLButtonElement>) => {
event.stopPropagation();
setSystemModalVisible(true);
};
React.useEffect(() => {
if (titleMode === "edit") {
inputRef.current?.focus();
@@ -216,11 +207,6 @@ export function ConversationCard({
: undefined
}
onDisplayCost={showOptions ? handleDisplayCost : undefined}
onShowAgentTools={
showOptions && systemMessage
? handleShowAgentTools
: undefined
}
position={variant === "compact" ? "top" : "bottom"}
/>
)}
@@ -329,12 +315,6 @@ export function ConversationCard({
)}
</div>
</BaseModal>
<SystemMessageModal
isOpen={systemModalVisible}
onClose={() => setSystemModalVisible(false)}
systemMessage={systemMessage}
/>
</>
);
}
@@ -0,0 +1,20 @@
import { useTranslation } from "react-i18next";
import { I18nKey } from "#/i18n/declaration";
interface NewConversationButtonProps {
onClick: () => void;
}
export function NewConversationButton({ onClick }: NewConversationButtonProps) {
const { t } = useTranslation();
return (
<button
data-testid="new-conversation-button"
type="button"
onClick={onClick}
className="font-bold bg-[#4465DB] px-2 py-1 rounded"
>
+ {t(I18nKey.PROJECT$NEW)}
</button>
);
}
@@ -1,238 +0,0 @@
import React, { useState } from "react";
import { useTranslation } from "react-i18next";
import { ChevronDown, ChevronRight } from "lucide-react";
import ReactJsonView from "@microlink/react-json-view";
import { BaseModalTitle } from "#/components/shared/modals/confirmation-modals/base-modal";
import { ModalBackdrop } from "#/components/shared/modals/modal-backdrop";
import { ModalBody } from "#/components/shared/modals/modal-body";
import { cn } from "#/utils/utils";
// Custom JSON viewer theme that matches our application theme
const jsonViewTheme = {
base00: "transparent", // background
base01: "#2d2d2d", // lighter background
base02: "#4e4e4e", // selection background
base03: "#6c6c6c", // comments, invisibles
base04: "#969896", // dark foreground
base05: "#d9d9d9", // default foreground
base06: "#e8e8e8", // light foreground
base07: "#ffffff", // light background
base08: "#ff5370", // variables, red
base09: "#f78c6c", // integers, orange
base0A: "#ffcb6b", // booleans, yellow
base0B: "#c3e88d", // strings, green
base0C: "#89ddff", // support, cyan
base0D: "#82aaff", // functions, blue
base0E: "#c792ea", // keywords, purple
base0F: "#ff5370", // deprecated, red
};
interface SystemMessageModalProps {
isOpen: boolean;
onClose: () => void;
systemMessage: {
content: string;
tools: Array<Record<string, unknown>> | null;
openhands_version: string | null;
agent_class: string | null;
} | null;
}
interface FunctionData {
name?: string;
description?: string;
parameters?: Record<string, unknown>;
}
interface ToolData {
type?: string;
function?: FunctionData;
name?: string;
description?: string;
parameters?: Record<string, unknown>;
}
export function SystemMessageModal({
isOpen,
onClose,
systemMessage,
}: SystemMessageModalProps) {
const { t } = useTranslation();
const [activeTab, setActiveTab] = useState<"system" | "tools">("system");
const [expandedTools, setExpandedTools] = useState<Record<number, boolean>>(
{},
);
if (!systemMessage) {
return null;
}
const toggleTool = (index: number) => {
setExpandedTools((prev) => ({
...prev,
[index]: !prev[index],
}));
};
return (
isOpen && (
<ModalBackdrop onClose={onClose}>
<ModalBody
width="medium"
className="max-h-[80vh] flex flex-col items-start"
>
<div className="flex flex-col gap-6 w-full">
<BaseModalTitle title={t("SYSTEM_MESSAGE_MODAL$TITLE")} />
<div className="flex flex-col gap-2">
{systemMessage.agent_class && (
<div className="text-sm">
<span className="font-semibold text-gray-300">
{t("SYSTEM_MESSAGE_MODAL$AGENT_CLASS")}
</span>{" "}
<span className="font-medium text-gray-100">
{systemMessage.agent_class}
</span>
</div>
)}
{systemMessage.openhands_version && (
<div className="text-sm">
<span className="font-semibold text-gray-300">
{t("SYSTEM_MESSAGE_MODAL$OPENHANDS_VERSION")}
</span>{" "}
<span className="text-gray-100">
{systemMessage.openhands_version}
</span>
</div>
)}
</div>
</div>
<div className="w-full">
<div className="flex border-b mb-2">
<button
type="button"
className={cn(
"px-4 py-2 font-medium border-b-2 transition-colors",
activeTab === "system"
? "border-primary text-gray-100"
: "border-transparent hover:text-gray-700 dark:hover:text-gray-300",
)}
onClick={() => setActiveTab("system")}
>
{t("SYSTEM_MESSAGE_MODAL$SYSTEM_MESSAGE_TAB")}
</button>
{systemMessage.tools && systemMessage.tools.length > 0 && (
<button
type="button"
className={cn(
"px-4 py-2 font-medium border-b-2 transition-colors",
activeTab === "tools"
? "border-primary text-gray-100"
: "border-transparent hover:text-gray-700 dark:hover:text-gray-300",
)}
onClick={() => setActiveTab("tools")}
>
{t("SYSTEM_MESSAGE_MODAL$TOOLS_TAB")}
</button>
)}
</div>
<div className="h-[60vh] overflow-auto rounded-md">
{activeTab === "system" && (
<div className="p-4 whitespace-pre-wrap font-mono text-sm leading-relaxed text-gray-300 shadow-inner">
{systemMessage.content}
</div>
)}
{activeTab === "tools" &&
systemMessage.tools &&
systemMessage.tools.length > 0 && (
<div className="p-2 space-y-3">
{systemMessage.tools.map((tool, index) => {
// Extract function data from the nested structure
const toolData = tool as ToolData;
const functionData = toolData.function || toolData;
const name =
functionData.name ||
(toolData.type === "function" &&
toolData.function?.name) ||
"";
const description =
functionData.description ||
(toolData.type === "function" &&
toolData.function?.description) ||
"";
const parameters =
functionData.parameters ||
(toolData.type === "function" &&
toolData.function?.parameters) ||
null;
const isExpanded = expandedTools[index] || false;
return (
<div key={index} className="rounded-md overflow-hidden">
<button
type="button"
onClick={() => toggleTool(index)}
className="w-full py-3 px-2 text-left flex items-center justify-between hover:bg-gray-700 transition-colors"
>
<div className="flex items-center">
<h3 className="font-bold text-gray-100">
{String(name)}
</h3>
</div>
<span className="text-gray-300">
{isExpanded ? (
<ChevronDown size={18} />
) : (
<ChevronRight size={18} />
)}
</span>
</button>
{isExpanded && (
<div className="px-2 pb-3 pt-1">
<div className="mt-2 mb-3">
<p className="text-sm whitespace-pre-wrap text-gray-300 leading-relaxed">
{String(description)}
</p>
</div>
{/* Parameters section */}
{parameters && (
<div className="mt-2">
<h4 className="text-sm font-semibold text-gray-300">
{t("SYSTEM_MESSAGE_MODAL$PARAMETERS")}
</h4>
<div className="text-sm mt-2 p-3 bg-gray-900 rounded-md overflow-auto text-gray-300 max-h-[400px] shadow-inner">
<ReactJsonView
src={parameters}
theme={jsonViewTheme}
/>
</div>
</div>
)}
</div>
)}
</div>
);
})}
</div>
)}
{activeTab === "tools" &&
(!systemMessage.tools || systemMessage.tools.length === 0) && (
<div className="flex items-center justify-center h-full p-4">
<p className="text-gray-400">
{t("SYSTEM_MESSAGE_MODAL$NO_TOOLS")}
</p>
</div>
)}
</div>
</div>
</ModalBody>
</ModalBackdrop>
)
);
}
@@ -1,172 +0,0 @@
import { DiffEditor } from "@monaco-editor/react";
import React from "react";
import { editor as editor_t } from "monaco-editor";
import { LuFileDiff, LuFileMinus, LuFilePlus } from "react-icons/lu";
import { IconType } from "react-icons/lib";
import { GitChangeStatus } from "#/api/open-hands.types";
import { getLanguageFromPath } from "#/utils/get-language-from-path";
import { cn } from "#/utils/utils";
import ChevronUp from "#/icons/chveron-up.svg?react";
import { useGitDiff } from "#/hooks/query/use-get-diff";
interface LoadingSpinnerProps {
className?: string;
}
// TODO: Move out of this file and replace the current spinner with this one
function LoadingSpinner({ className }: LoadingSpinnerProps) {
return (
<div className="flex items-center justify-center">
<div
className={cn(
"animate-spin rounded-full border-4 border-gray-200 border-t-blue-500",
className,
)}
role="status"
aria-label="Loading"
/>
</div>
);
}
const STATUS_MAP: Record<GitChangeStatus, string | IconType> = {
A: LuFilePlus,
D: LuFileMinus,
M: LuFileDiff,
R: "Renamed",
U: "Untracked",
};
export interface FileDiffViewerProps {
path: string;
type: GitChangeStatus;
}
export function FileDiffViewer({ path, type }: FileDiffViewerProps) {
const [isCollapsed, setIsCollapsed] = React.useState(true);
const [editorHeight, setEditorHeight] = React.useState(400);
const diffEditorRef = React.useRef<editor_t.IStandaloneDiffEditor>(null);
const isAdded = type === "A" || type === "U";
const isDeleted = type === "D";
const filePath = React.useMemo(() => {
if (type === "R") {
const parts = path.split(/\s+/).slice(1);
return parts[parts.length - 1];
}
return path;
}, [path, type]);
const {
data: diff,
isLoading,
isSuccess,
isRefetching,
} = useGitDiff({
filePath,
type,
enabled: !isCollapsed,
});
// Function to update editor height based on content
const updateEditorHeight = React.useCallback(() => {
if (diffEditorRef.current) {
const originalEditor = diffEditorRef.current.getOriginalEditor();
const modifiedEditor = diffEditorRef.current.getModifiedEditor();
if (originalEditor && modifiedEditor) {
// Get the content height from both editors and use the larger one
const originalHeight = originalEditor.getContentHeight();
const modifiedHeight = modifiedEditor.getContentHeight();
const contentHeight = Math.max(originalHeight, modifiedHeight);
// Add a small buffer to avoid scrollbar
setEditorHeight(contentHeight + 20);
}
}
}, []);
const handleEditorDidMount = (editor: editor_t.IStandaloneDiffEditor) => {
diffEditorRef.current = editor;
updateEditorHeight();
const originalEditor = editor.getOriginalEditor();
const modifiedEditor = editor.getModifiedEditor();
originalEditor.onDidContentSizeChange(updateEditorHeight);
modifiedEditor.onDidContentSizeChange(updateEditorHeight);
};
const status = type === "U" ? STATUS_MAP.A : STATUS_MAP[type];
let statusIcon: React.ReactNode;
if (typeof status === "string") {
statusIcon = <span>{status}</span>;
} else {
const StatusIcon = status; // now it's recognized as a component
statusIcon = <StatusIcon className="w-5 h-5" />;
}
const isFetchingData = isLoading || isRefetching;
return (
<div data-testid="file-diff-viewer-outer" className="w-full flex flex-col">
<div
className={cn(
"flex justify-between items-center px-2.5 py-3.5 border border-neutral-600 rounded-xl hover:cursor-pointer",
!isCollapsed && !isLoading && "border-b-0 rounded-b-none",
)}
onClick={() => setIsCollapsed((prev) => !prev)}
>
<span className="text-sm w-full text-content flex items-center gap-2">
{isFetchingData && <LoadingSpinner className="w-5 h-5" />}
{!isFetchingData && statusIcon}
<strong className="w-full truncate">{filePath}</strong>
<button data-testid="collapse" type="button">
<ChevronUp
className={cn(
"w-4 h-4 transition-transform",
isCollapsed && "transform rotate-180",
)}
/>
</button>
</span>
</div>
{isSuccess && !isCollapsed && (
<div
className="w-full border border-neutral-600 overflow-hidden"
style={{ height: `${editorHeight}px` }}
>
<DiffEditor
data-testid="file-diff-viewer"
className="w-full h-full"
language={getLanguageFromPath(filePath)}
original={isAdded ? "" : diff.original}
modified={isDeleted ? "" : diff.modified}
theme="vs-dark"
onMount={handleEditorDidMount}
options={{
renderValidationDecorations: "off",
readOnly: true,
renderSideBySide: !isAdded && !isDeleted,
scrollBeyondLastLine: false,
minimap: {
enabled: false,
},
hideUnchangedRegions: {
enabled: true,
},
automaticLayout: true,
scrollbar: {
// Make scrollbar less intrusive
alwaysConsumeMouseWheel: false,
},
}}
/>
</div>
)}
</div>
);
}
@@ -1,21 +0,0 @@
import { Link } from "react-router";
import { useTranslation } from "react-i18next";
import { BrandButton } from "#/components/features/settings/brand-button";
import { useSettings } from "#/hooks/query/use-settings";
export function ConnectToProviderMessage() {
const { isLoading } = useSettings();
const { t } = useTranslation();
return (
<div className="flex flex-col gap-4">
<p>{t("HOME$CONNECT_PROVIDER_MESSAGE")}</p>
<Link data-testid="navigate-to-settings-button" to="/settings">
<BrandButton type="button" variant="primary" isDisabled={isLoading}>
{!isLoading && t("SETTINGS$TITLE")}
{isLoading && t("HOME$LOADING")}
</BrandButton>
</Link>
</div>
);
}
@@ -1,57 +0,0 @@
import { useTranslation } from "react-i18next";
import { useCreateConversation } from "#/hooks/mutation/use-create-conversation";
import { useIsCreatingConversation } from "#/hooks/use-is-creating-conversation";
import { BrandButton } from "../settings/brand-button";
import AllHandsLogo from "#/assets/branding/all-hands-logo-spark.svg?react";
export function HomeHeader() {
const {
mutate: createConversation,
isPending,
isSuccess,
} = useCreateConversation();
const isCreatingConversationElsewhere = useIsCreatingConversation();
const { t } = useTranslation();
// We check for isSuccess because the app might require time to render
// into the new conversation screen after the conversation is created.
const isCreatingConversation =
isPending || isSuccess || isCreatingConversationElsewhere;
return (
<header className="flex flex-col gap-5">
<AllHandsLogo />
<div className="flex items-center justify-between">
<h1 className="heading">{t("HOME$LETS_START_BUILDING")}</h1>
<BrandButton
testId="header-launch-button"
variant="primary"
type="button"
onClick={() => createConversation({})}
isDisabled={isCreatingConversation}
>
{!isCreatingConversation && "Launch from Scratch"}
{isCreatingConversation && t("HOME$LOADING")}
</BrandButton>
</div>
<div className="flex items-center justify-between">
<p className="text-sm max-w-[424px]">
{t("HOME$OPENHANDS_DESCRIPTION")}
</p>
<p className="text-sm">
{t("HOME$NOT_SURE_HOW_TO_START")}{" "}
<a
href="https://docs.all-hands.dev/modules/usage/getting-started"
target="_blank"
rel="noopener noreferrer"
className="underline underline-offset-2"
>
Read this
</a>
</p>
</div>
</header>
);
}
@@ -1,34 +0,0 @@
import { useTranslation } from "react-i18next";
import { ConnectToProviderMessage } from "./connect-to-provider-message";
import { useAuth } from "#/context/auth-context";
import { RepositorySelectionForm } from "./repo-selection-form";
import { useConfig } from "#/hooks/query/use-config";
import { RepoProviderLinks } from "./repo-provider-links";
interface RepoConnectorProps {
onRepoSelection: (repoTitle: string | null) => void;
}
export function RepoConnector({ onRepoSelection }: RepoConnectorProps) {
const { providersAreSet } = useAuth();
const { data: config } = useConfig();
const { t } = useTranslation();
const isSaaS = config?.APP_MODE === "saas";
return (
<section
data-testid="repo-connector"
className="w-full flex flex-col gap-6"
>
<h2 className="heading">{t("HOME$CONNECT_TO_REPOSITORY")}</h2>
{!providersAreSet && <ConnectToProviderMessage />}
{providersAreSet && (
<RepositorySelectionForm onRepoSelection={onRepoSelection} />
)}
{isSaaS && providersAreSet && <RepoProviderLinks />}
</section>
);
}
@@ -1,17 +0,0 @@
import { useConfig } from "#/hooks/query/use-config";
export function RepoProviderLinks() {
const { data: config } = useConfig();
const githubHref = config
? `https://github.com/apps/${config.APP_SLUG}/installations/new`
: "";
return (
<div className="flex flex-col text-sm underline underline-offset-2 text-content-2 gap-4 w-fit">
<a href={githubHref} target="_blank" rel="noopener noreferrer">
Add GitHub repos
</a>
</div>
);
}
@@ -1,138 +0,0 @@
import { render, screen } from "@testing-library/react";
import { describe, test, expect, vi, beforeEach } from "vitest";
import { RepositorySelectionForm } from "./repo-selection-form";
// Create mock functions
const mockUseUserRepositories = vi.fn();
const mockUseCreateConversation = vi.fn();
const mockUseIsCreatingConversation = vi.fn();
const mockUseTranslation = vi.fn();
const mockUseAuth = vi.fn();
// Setup default mock returns
mockUseUserRepositories.mockReturnValue({
data: { pages: [{ data: [] }] },
isLoading: false,
isError: false,
});
mockUseCreateConversation.mockReturnValue({
mutate: vi.fn(),
isPending: false,
isSuccess: false,
});
mockUseIsCreatingConversation.mockReturnValue(false);
mockUseTranslation.mockReturnValue({ t: (key: string) => key });
mockUseAuth.mockReturnValue({
isAuthenticated: true,
isLoading: false,
providersAreSet: true,
user: {
id: 1,
login: "testuser",
avatar_url: "https://example.com/avatar.png",
name: "Test User",
email: "test@example.com",
company: "Test Company",
},
login: vi.fn(),
logout: vi.fn(),
});
// Mock the modules
vi.mock("#/hooks/query/use-user-repositories", () => ({
useUserRepositories: () => mockUseUserRepositories(),
}));
vi.mock("#/hooks/mutation/use-create-conversation", () => ({
useCreateConversation: () => mockUseCreateConversation(),
}));
vi.mock("#/hooks/use-is-creating-conversation", () => ({
useIsCreatingConversation: () => mockUseIsCreatingConversation(),
}));
vi.mock("react-i18next", () => ({
useTranslation: () => mockUseTranslation(),
}));
vi.mock("#/context/auth-context", () => ({
useAuth: () => mockUseAuth(),
}));
describe("RepositorySelectionForm", () => {
const mockOnRepoSelection = vi.fn();
beforeEach(() => {
vi.clearAllMocks();
});
test("shows loading indicator when repositories are being fetched", () => {
// Setup loading state
mockUseUserRepositories.mockReturnValue({
data: undefined,
isLoading: true,
isError: false,
});
render(<RepositorySelectionForm onRepoSelection={mockOnRepoSelection} />);
// Check if loading indicator is displayed
expect(screen.getByTestId("repo-dropdown-loading")).toBeInTheDocument();
expect(screen.getByText("HOME$LOADING_REPOSITORIES")).toBeInTheDocument();
});
test("shows dropdown when repositories are loaded", () => {
// Setup loaded repositories
mockUseUserRepositories.mockReturnValue({
data: {
pages: [
{
data: [
{
id: 1,
full_name: "user/repo1",
git_provider: "github",
is_public: true,
},
{
id: 2,
full_name: "user/repo2",
git_provider: "github",
is_public: true,
},
],
},
],
},
isLoading: false,
isError: false,
});
render(<RepositorySelectionForm onRepoSelection={mockOnRepoSelection} />);
// Check if dropdown is displayed
expect(screen.getByTestId("repo-dropdown")).toBeInTheDocument();
});
test("shows error message when repository fetch fails", () => {
// Setup error state
mockUseUserRepositories.mockReturnValue({
data: undefined,
isLoading: false,
isError: true,
error: new Error("Failed to fetch repositories"),
});
render(<RepositorySelectionForm onRepoSelection={mockOnRepoSelection} />);
// Check if error message is displayed
expect(screen.getByTestId("repo-dropdown-error")).toBeInTheDocument();
expect(
screen.getByText("HOME$FAILED_TO_LOAD_REPOSITORIES"),
).toBeInTheDocument();
});
});
@@ -1,152 +0,0 @@
import React from "react";
import { useTranslation } from "react-i18next";
import { Spinner } from "@heroui/react";
import { useCreateConversation } from "#/hooks/mutation/use-create-conversation";
import { useUserRepositories } from "#/hooks/query/use-user-repositories";
import { useIsCreatingConversation } from "#/hooks/use-is-creating-conversation";
import { GitRepository } from "#/types/git";
import { BrandButton } from "../settings/brand-button";
import { SettingsDropdownInput } from "../settings/settings-dropdown-input";
interface RepositorySelectionFormProps {
onRepoSelection: (repoTitle: string | null) => void;
}
// Loading state component
function RepositoryLoadingState() {
const { t } = useTranslation();
return (
<div
data-testid="repo-dropdown-loading"
className="flex items-center gap-2 max-w-[500px] h-10 px-3 bg-tertiary border border-[#717888] rounded"
>
<Spinner size="sm" />
<span className="text-sm">{t("HOME$LOADING_REPOSITORIES")}</span>
</div>
);
}
// Error state component
function RepositoryErrorState() {
const { t } = useTranslation();
return (
<div
data-testid="repo-dropdown-error"
className="flex items-center gap-2 max-w-[500px] h-10 px-3 bg-tertiary border border-[#717888] rounded text-red-500"
>
<span className="text-sm">{t("HOME$FAILED_TO_LOAD_REPOSITORIES")}</span>
</div>
);
}
// Repository dropdown component
interface RepositoryDropdownProps {
items: { key: React.Key; label: string }[];
onSelectionChange: (key: React.Key | null) => void;
onInputChange: (value: string) => void;
}
function RepositoryDropdown({
items,
onSelectionChange,
onInputChange,
}: RepositoryDropdownProps) {
return (
<SettingsDropdownInput
testId="repo-dropdown"
name="repo-dropdown"
placeholder="Select a repo"
items={items}
wrapperClassName="max-w-[500px]"
onSelectionChange={onSelectionChange}
onInputChange={onInputChange}
/>
);
}
export function RepositorySelectionForm({
onRepoSelection,
}: RepositorySelectionFormProps) {
const [selectedRepository, setSelectedRepository] =
React.useState<GitRepository | null>(null);
const {
data: repositories,
isLoading: isLoadingRepositories,
isError: isRepositoriesError,
} = useUserRepositories();
const {
mutate: createConversation,
isPending,
isSuccess,
} = useCreateConversation();
const isCreatingConversationElsewhere = useIsCreatingConversation();
const { t } = useTranslation();
// We check for isSuccess because the app might require time to render
// into the new conversation screen after the conversation is created.
const isCreatingConversation =
isPending || isSuccess || isCreatingConversationElsewhere;
const repositoriesList = repositories?.pages.flatMap((page) => page.data);
const repositoriesItems = repositoriesList?.map((repo) => ({
key: repo.id,
label: repo.full_name,
}));
const handleRepoSelection = (key: React.Key | null) => {
const selectedRepo = repositoriesList?.find(
(repo) => repo.id.toString() === key,
);
if (selectedRepo) onRepoSelection(selectedRepo.full_name);
setSelectedRepository(selectedRepo || null);
};
const handleInputChange = (value: string) => {
if (value === "") {
setSelectedRepository(null);
onRepoSelection(null);
}
};
// Render the appropriate UI based on the loading/error state
const renderRepositorySelector = () => {
if (isLoadingRepositories) {
return <RepositoryLoadingState />;
}
if (isRepositoriesError) {
return <RepositoryErrorState />;
}
return (
<RepositoryDropdown
items={repositoriesItems || []}
onSelectionChange={handleRepoSelection}
onInputChange={handleInputChange}
/>
);
};
return (
<>
{renderRepositorySelector()}
<BrandButton
testId="repo-launch-button"
variant="primary"
type="button"
isDisabled={
!selectedRepository ||
isCreatingConversation ||
isLoadingRepositories ||
isRepositoriesError
}
onClick={() => createConversation({ selectedRepository })}
>
{!isCreatingConversation && "Launch"}
{isCreatingConversation && t("HOME$LOADING")}
</BrandButton>
</>
);
}
@@ -1,95 +0,0 @@
import { Provider } from "#/types/settings";
import { SuggestedTaskType } from "./task.types";
// Helper function to get provider-specific terminology
const getProviderTerms = (git_provider: Provider) => {
if (git_provider === "gitlab") {
return {
requestType: "Merge Request",
requestTypeShort: "MR",
apiName: "GitLab API",
tokenEnvVar: "GITLAB_TOKEN",
ciSystem: "CI pipelines",
ciProvider: "GitLab",
requestVerb: "merge request",
};
}
return {
requestType: "Pull Request",
requestTypeShort: "PR",
apiName: "GitHub API",
tokenEnvVar: "GITHUB_TOKEN",
ciSystem: "GitHub Actions",
ciProvider: "GitHub",
requestVerb: "pull request",
};
};
export const getMergeConflictPrompt = (
git_provider: Provider,
issueNumber: number,
repo: string,
) => {
const terms = getProviderTerms(git_provider);
return `You are working on ${terms.requestType} #${issueNumber} in repository ${repo}. You need to fix the merge conflicts.
Use the ${terms.apiName} with the ${terms.tokenEnvVar} environment variable to retrieve the ${terms.requestTypeShort} details. Check out the branch from that ${terms.requestVerb} and look at the diff versus the base branch of the ${terms.requestTypeShort} to understand the ${terms.requestTypeShort}'s intention.
Then resolve the merge conflicts. If you aren't sure what the right solution is, look back through the commit history at the commits that introduced the conflict and resolve them accordingly.`;
};
export const getFailingChecksPrompt = (
git_provider: Provider,
issueNumber: number,
repo: string,
) => {
const terms = getProviderTerms(git_provider);
return `You are working on ${terms.requestType} #${issueNumber} in repository ${repo}. You need to fix the failing CI checks.
Use the ${terms.apiName} with the ${terms.tokenEnvVar} environment variable to retrieve the ${terms.requestTypeShort} details. Check out the branch from that ${terms.requestVerb} and look at the diff versus the base branch of the ${terms.requestTypeShort} to understand the ${terms.requestTypeShort}'s intention.
Then use the ${terms.apiName} to look at the ${terms.ciSystem} that are failing on the most recent commit. Try and reproduce the failure locally.
Get things working locally, then push your changes. Sleep for 30 seconds at a time until the ${terms.ciProvider} ${terms.ciSystem.toLowerCase()} have run again. If they are still failing, repeat the process.`;
};
export const getUnresolvedCommentsPrompt = (
git_provider: Provider,
issueNumber: number,
repo: string,
) => {
const terms = getProviderTerms(git_provider);
return `You are working on ${terms.requestType} #${issueNumber} in repository ${repo}. You need to resolve the remaining comments from reviewers.
Use the ${terms.apiName} with the ${terms.tokenEnvVar} environment variable to retrieve the ${terms.requestTypeShort} details. Check out the branch from that ${terms.requestVerb} and look at the diff versus the base branch of the ${terms.requestTypeShort} to understand the ${terms.requestTypeShort}'s intention.
Then use the ${terms.apiName} to retrieve all the feedback on the ${terms.requestTypeShort} so far. If anything hasn't been addressed, address it and commit your changes back to the same branch.`;
};
export const getOpenIssuePrompt = (
git_provider: Provider,
issueNumber: number,
repo: string,
) => {
const terms = getProviderTerms(git_provider);
return `You are working on Issue #${issueNumber} in repository ${repo}. Your goal is to fix the issue.
Use the ${terms.apiName} with the ${terms.tokenEnvVar} environment variable to retrieve the issue details and any comments on the issue. Then check out a new branch and investigate what changes will need to be made.
Finally, make the required changes and open up a ${terms.requestVerb}. Be sure to reference the issue in the ${terms.requestTypeShort} description.`;
};
export const getPromptForQuery = (
git_provider: Provider,
type: SuggestedTaskType,
issueNumber: number,
repo: string,
) => {
switch (type) {
case "MERGE_CONFLICTS":
return getMergeConflictPrompt(git_provider, issueNumber, repo);
case "FAILING_CHECKS":
return getFailingChecksPrompt(git_provider, issueNumber, repo);
case "UNRESOLVED_COMMENTS":
return getUnresolvedCommentsPrompt(git_provider, issueNumber, repo);
case "OPEN_ISSUE":
return getOpenIssuePrompt(git_provider, issueNumber, repo);
default:
return "";
}
};
@@ -1,91 +0,0 @@
import { useTranslation } from "react-i18next";
import { SuggestedTask } from "./task.types";
import { useIsCreatingConversation } from "#/hooks/use-is-creating-conversation";
import { useCreateConversation } from "#/hooks/mutation/use-create-conversation";
import { cn } from "#/utils/utils";
import { useUserRepositories } from "#/hooks/query/use-user-repositories";
import { getPromptForQuery } from "./get-prompt-for-query";
import { TaskIssueNumber } from "./task-issue-number";
import { Provider } from "#/types/settings";
const getTaskTypeMap = (
t: (key: string) => string,
): Record<SuggestedTask["task_type"], string> => ({
FAILING_CHECKS: t("HOME$FIX_FAILING_CHECKS"),
MERGE_CONFLICTS: t("HOME$RESOLVE_MERGE_CONFLICTS"),
OPEN_ISSUE: t("HOME$OPEN_ISSUE"),
UNRESOLVED_COMMENTS: t("HOME$RESOLVE_UNRESOLVED_COMMENTS"),
});
interface TaskCardProps {
task: SuggestedTask;
}
export function TaskCard({ task }: TaskCardProps) {
const { data: repositories } = useUserRepositories();
const { mutate: createConversation, isPending } = useCreateConversation();
const isCreatingConversation = useIsCreatingConversation();
const { t } = useTranslation();
const getRepo = (repo: string, git_provider: Provider) => {
const repositoriesList = repositories?.pages.flatMap((page) => page.data);
const selectedRepo = repositoriesList?.find(
(repository) =>
repository.full_name === repo &&
repository.git_provider === git_provider,
);
return selectedRepo;
};
const handleLaunchConversation = () => {
const repo = getRepo(task.repo, task.git_provider);
const query = getPromptForQuery(
task.git_provider,
task.task_type,
task.issue_number,
task.repo,
);
return createConversation({
selectedRepository: repo,
q: query,
});
};
// Determine the correct URL format based on git provider
let href: string;
if (task.git_provider === "gitlab") {
const issueType =
task.task_type === "OPEN_ISSUE" ? "issues" : "merge_requests";
href = `https://gitlab.com/${task.repo}/-/${issueType}/${task.issue_number}`;
} else {
const hrefType = task.task_type === "OPEN_ISSUE" ? "issues" : "pull";
href = `https://github.com/${task.repo}/${hrefType}/${task.issue_number}`;
}
return (
<li className="py-3 border-b border-[#717888] flex items-center pr-6">
<TaskIssueNumber issueNumber={task.issue_number} href={href} />
<div className="w-full pl-8">
<p className="font-semibold">{getTaskTypeMap(t)[task.task_type]}</p>
<p>{task.title}</p>
</div>
<button
type="button"
data-testid="task-launch-button"
className={cn(
"underline underline-offset-2 disabled:opacity-80",
isPending && "no-underline font-bold",
)}
disabled={isCreatingConversation}
onClick={handleLaunchConversation}
>
{!isPending && t("HOME$LAUNCH")}
{isPending && t("HOME$LOADING")}
</button>
</li>
);
}

Some files were not shown because too many files have changed in this diff Show More